granicus.if.org Git - libvpx/blob - vp9/encoder/vp9_encoder.c

   1 /*
   2  * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <limits.h>
  12 #include <math.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15
  16 #include "./vp9_rtcd.h"
  17 #include "./vpx_config.h"
  18 #include "./vpx_dsp_rtcd.h"
  19 #include "./vpx_scale_rtcd.h"
  20 #include "vpx_dsp/psnr.h"
  21 #include "vpx_dsp/vpx_dsp_common.h"
  22 #include "vpx_dsp/vpx_filter.h"
  23 #if CONFIG_INTERNAL_STATS
  24 #include "vpx_dsp/ssim.h"
  25 #endif
  26 #include "vpx_ports/mem.h"
  27 #include "vpx_ports/system_state.h"
  28 #include "vpx_ports/vpx_timer.h"
  29 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  30 #include "vpx_util/vpx_debug_util.h"
  31 #endif  // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
  32
  33 #include "vp9/common/vp9_alloccommon.h"
  34 #include "vp9/common/vp9_filter.h"
  35 #include "vp9/common/vp9_idct.h"
  36 #if CONFIG_NON_GREEDY_MV
  37 #include "vp9/common/vp9_mvref_common.h"
  38 #endif
  39 #if CONFIG_VP9_POSTPROC
  40 #include "vp9/common/vp9_postproc.h"
  41 #endif
  42 #include "vp9/common/vp9_reconinter.h"
  43 #include "vp9/common/vp9_reconintra.h"
  44 #include "vp9/common/vp9_tile_common.h"
  45 #include "vp9/common/vp9_scan.h"
  46
  47 #if !CONFIG_REALTIME_ONLY
  48 #include "vp9/encoder/vp9_alt_ref_aq.h"
  49 #include "vp9/encoder/vp9_aq_360.h"
  50 #include "vp9/encoder/vp9_aq_complexity.h"
  51 #endif
  52 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  53 #if !CONFIG_REALTIME_ONLY
  54 #include "vp9/encoder/vp9_aq_variance.h"
  55 #endif
  56 #include "vp9/encoder/vp9_bitstream.h"
  57 #if CONFIG_INTERNAL_STATS
  58 #include "vp9/encoder/vp9_blockiness.h"
  59 #endif
  60 #include "vp9/encoder/vp9_context_tree.h"
  61 #include "vp9/encoder/vp9_encodeframe.h"
  62 #include "vp9/encoder/vp9_encodemb.h"
  63 #include "vp9/encoder/vp9_encodemv.h"
  64 #include "vp9/encoder/vp9_encoder.h"
  65 #include "vp9/encoder/vp9_ethread.h"
  66 #include "vp9/encoder/vp9_extend.h"
  67 #include "vp9/encoder/vp9_firstpass.h"
  68 #include "vp9/encoder/vp9_mbgraph.h"
  69 #if CONFIG_NON_GREEDY_MV
  70 #include "vp9/encoder/vp9_mcomp.h"
  71 #endif
  72 #include "vp9/encoder/vp9_multi_thread.h"
  73 #include "vp9/encoder/vp9_noise_estimate.h"
  74 #include "vp9/encoder/vp9_picklpf.h"
  75 #include "vp9/encoder/vp9_ratectrl.h"
  76 #include "vp9/encoder/vp9_rd.h"
  77 #include "vp9/encoder/vp9_resize.h"
  78 #include "vp9/encoder/vp9_segmentation.h"
  79 #include "vp9/encoder/vp9_skin_detection.h"
  80 #include "vp9/encoder/vp9_speed_features.h"
  81 #include "vp9/encoder/vp9_svc_layercontext.h"
  82 #include "vp9/encoder/vp9_temporal_filter.h"
  83
  84 #define AM_SEGMENT_ID_INACTIVE 7
  85 #define AM_SEGMENT_ID_ACTIVE 0
  86
  87 // Whether to use high precision mv for altref computation.
  88 #define ALTREF_HIGH_PRECISION_MV 1
  89
  90 // Q threshold for high precision mv. Choose a very high value for now so that
  91 // HIGH_PRECISION is always chosen.
  92 #define HIGH_PRECISION_MV_QTHRESH 200
  93
  94 #define FRAME_SIZE_FACTOR 128  // empirical params for context model threshold
  95 #define FRAME_RATE_FACTOR 8
  96
  97 #ifdef OUTPUT_YUV_DENOISED
  98 FILE *yuv_denoised_file = NULL;
  99 #endif
 100 #ifdef OUTPUT_YUV_SKINMAP
 101 static FILE *yuv_skinmap_file = NULL;
 102 #endif
 103 #ifdef OUTPUT_YUV_REC
 104 FILE *yuv_rec_file;
 105 #endif
 106 #ifdef OUTPUT_YUV_SVC_SRC
 107 FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
 108 #endif
 109
 110 #if 0
 111 FILE *framepsnr;
 112 FILE *kf_list;
 113 FILE *keyfile;
 114 #endif
 115
 116 #ifdef ENABLE_KF_DENOISE
 117 // Test condition for spatial denoise of source.
 118 static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
 119   VP9_COMMON *const cm = &cpi->common;
 120   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 121
 122   return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) &&
 123          frame_is_intra_only(cm);
 124 }
 125 #endif
 126
 127 #if CONFIG_VP9_HIGHBITDEPTH
 128 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 129                          TX_SIZE tx_size);
 130 #endif
 131 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 132                   TX_SIZE tx_size);
 133
 134 #if !CONFIG_REALTIME_ONLY
 135 // compute adaptive threshold for skip recoding
 136 static int compute_context_model_thresh(const VP9_COMP *const cpi) {
 137   const VP9_COMMON *const cm = &cpi->common;
 138   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 139   const int frame_size = (cm->width * cm->height) >> 10;
 140   const int bitrate = (int)(oxcf->target_bandwidth >> 10);
 141   const int qindex_factor = cm->base_qindex + (MAXQ >> 1);
 142
 143   // This equation makes the threshold adaptive to frame size.
 144   // Coding gain obtained by recoding comes from alternate frames of large
 145   // content change. We skip recoding if the difference of previous and current
 146   // frame context probability model is less than a certain threshold.
 147   // The first component is the most critical part to guarantee adaptivity.
 148   // Other parameters are estimated based on normal setting of hd resolution
 149   // parameters. e.g frame_size = 1920x1080, bitrate = 8000, qindex_factor < 50
 150   const int thresh =
 151       ((FRAME_SIZE_FACTOR * frame_size - FRAME_RATE_FACTOR * bitrate) *
 152        qindex_factor) >>
 153       9;
 154
 155   return thresh;
 156 }
 157
 158 // compute the total cost difference between current
 159 // and previous frame context prob model.
 160 static int compute_context_model_diff(const VP9_COMMON *const cm) {
 161   const FRAME_CONTEXT *const pre_fc =
 162       &cm->frame_contexts[cm->frame_context_idx];
 163   const FRAME_CONTEXT *const cur_fc = cm->fc;
 164   const FRAME_COUNTS *counts = &cm->counts;
 165   vpx_prob pre_last_prob, cur_last_prob;
 166   int diff = 0;
 167   int i, j, k, l, m, n;
 168
 169   // y_mode_prob
 170   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
 171     for (j = 0; j < INTRA_MODES - 1; ++j) {
 172       diff += (int)counts->y_mode[i][j] *
 173               (pre_fc->y_mode_prob[i][j] - cur_fc->y_mode_prob[i][j]);
 174     }
 175     pre_last_prob = MAX_PROB - pre_fc->y_mode_prob[i][INTRA_MODES - 2];
 176     cur_last_prob = MAX_PROB - cur_fc->y_mode_prob[i][INTRA_MODES - 2];
 177
 178     diff += (int)counts->y_mode[i][INTRA_MODES - 1] *
 179             (pre_last_prob - cur_last_prob);
 180   }
 181
 182   // uv_mode_prob
 183   for (i = 0; i < INTRA_MODES; ++i) {
 184     for (j = 0; j < INTRA_MODES - 1; ++j) {
 185       diff += (int)counts->uv_mode[i][j] *
 186               (pre_fc->uv_mode_prob[i][j] - cur_fc->uv_mode_prob[i][j]);
 187     }
 188     pre_last_prob = MAX_PROB - pre_fc->uv_mode_prob[i][INTRA_MODES - 2];
 189     cur_last_prob = MAX_PROB - cur_fc->uv_mode_prob[i][INTRA_MODES - 2];
 190
 191     diff += (int)counts->uv_mode[i][INTRA_MODES - 1] *
 192             (pre_last_prob - cur_last_prob);
 193   }
 194
 195   // partition_prob
 196   for (i = 0; i < PARTITION_CONTEXTS; ++i) {
 197     for (j = 0; j < PARTITION_TYPES - 1; ++j) {
 198       diff += (int)counts->partition[i][j] *
 199               (pre_fc->partition_prob[i][j] - cur_fc->partition_prob[i][j]);
 200     }
 201     pre_last_prob = MAX_PROB - pre_fc->partition_prob[i][PARTITION_TYPES - 2];
 202     cur_last_prob = MAX_PROB - cur_fc->partition_prob[i][PARTITION_TYPES - 2];
 203
 204     diff += (int)counts->partition[i][PARTITION_TYPES - 1] *
 205             (pre_last_prob - cur_last_prob);
 206   }
 207
 208   // coef_probs
 209   for (i = 0; i < TX_SIZES; ++i) {
 210     for (j = 0; j < PLANE_TYPES; ++j) {
 211       for (k = 0; k < REF_TYPES; ++k) {
 212         for (l = 0; l < COEF_BANDS; ++l) {
 213           for (m = 0; m < BAND_COEFF_CONTEXTS(l); ++m) {
 214             for (n = 0; n < UNCONSTRAINED_NODES; ++n) {
 215               diff += (int)counts->coef[i][j][k][l][m][n] *
 216                       (pre_fc->coef_probs[i][j][k][l][m][n] -
 217                        cur_fc->coef_probs[i][j][k][l][m][n]);
 218             }
 219
 220             pre_last_prob =
 221                 MAX_PROB -
 222                 pre_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 223             cur_last_prob =
 224                 MAX_PROB -
 225                 cur_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1];
 226
 227             diff += (int)counts->coef[i][j][k][l][m][UNCONSTRAINED_NODES] *
 228                     (pre_last_prob - cur_last_prob);
 229           }
 230         }
 231       }
 232     }
 233   }
 234
 235   // switchable_interp_prob
 236   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
 237     for (j = 0; j < SWITCHABLE_FILTERS - 1; ++j) {
 238       diff += (int)counts->switchable_interp[i][j] *
 239               (pre_fc->switchable_interp_prob[i][j] -
 240                cur_fc->switchable_interp_prob[i][j]);
 241     }
 242     pre_last_prob =
 243         MAX_PROB - pre_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 244     cur_last_prob =
 245         MAX_PROB - cur_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2];
 246
 247     diff += (int)counts->switchable_interp[i][SWITCHABLE_FILTERS - 1] *
 248             (pre_last_prob - cur_last_prob);
 249   }
 250
 251   // inter_mode_probs
 252   for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
 253     for (j = 0; j < INTER_MODES - 1; ++j) {
 254       diff += (int)counts->inter_mode[i][j] *
 255               (pre_fc->inter_mode_probs[i][j] - cur_fc->inter_mode_probs[i][j]);
 256     }
 257     pre_last_prob = MAX_PROB - pre_fc->inter_mode_probs[i][INTER_MODES - 2];
 258     cur_last_prob = MAX_PROB - cur_fc->inter_mode_probs[i][INTER_MODES - 2];
 259
 260     diff += (int)counts->inter_mode[i][INTER_MODES - 1] *
 261             (pre_last_prob - cur_last_prob);
 262   }
 263
 264   // intra_inter_prob
 265   for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
 266     diff += (int)counts->intra_inter[i][0] *
 267             (pre_fc->intra_inter_prob[i] - cur_fc->intra_inter_prob[i]);
 268
 269     pre_last_prob = MAX_PROB - pre_fc->intra_inter_prob[i];
 270     cur_last_prob = MAX_PROB - cur_fc->intra_inter_prob[i];
 271
 272     diff += (int)counts->intra_inter[i][1] * (pre_last_prob - cur_last_prob);
 273   }
 274
 275   // comp_inter_prob
 276   for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
 277     diff += (int)counts->comp_inter[i][0] *
 278             (pre_fc->comp_inter_prob[i] - cur_fc->comp_inter_prob[i]);
 279
 280     pre_last_prob = MAX_PROB - pre_fc->comp_inter_prob[i];
 281     cur_last_prob = MAX_PROB - cur_fc->comp_inter_prob[i];
 282
 283     diff += (int)counts->comp_inter[i][1] * (pre_last_prob - cur_last_prob);
 284   }
 285
 286   // single_ref_prob
 287   for (i = 0; i < REF_CONTEXTS; ++i) {
 288     for (j = 0; j < 2; ++j) {
 289       diff += (int)counts->single_ref[i][j][0] *
 290               (pre_fc->single_ref_prob[i][j] - cur_fc->single_ref_prob[i][j]);
 291
 292       pre_last_prob = MAX_PROB - pre_fc->single_ref_prob[i][j];
 293       cur_last_prob = MAX_PROB - cur_fc->single_ref_prob[i][j];
 294
 295       diff +=
 296           (int)counts->single_ref[i][j][1] * (pre_last_prob - cur_last_prob);
 297     }
 298   }
 299
 300   // comp_ref_prob
 301   for (i = 0; i < REF_CONTEXTS; ++i) {
 302     diff += (int)counts->comp_ref[i][0] *
 303             (pre_fc->comp_ref_prob[i] - cur_fc->comp_ref_prob[i]);
 304
 305     pre_last_prob = MAX_PROB - pre_fc->comp_ref_prob[i];
 306     cur_last_prob = MAX_PROB - cur_fc->comp_ref_prob[i];
 307
 308     diff += (int)counts->comp_ref[i][1] * (pre_last_prob - cur_last_prob);
 309   }
 310
 311   // tx_probs
 312   for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
 313     // p32x32
 314     for (j = 0; j < TX_SIZES - 1; ++j) {
 315       diff += (int)counts->tx.p32x32[i][j] *
 316               (pre_fc->tx_probs.p32x32[i][j] - cur_fc->tx_probs.p32x32[i][j]);
 317     }
 318     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 319     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p32x32[i][TX_SIZES - 2];
 320
 321     diff += (int)counts->tx.p32x32[i][TX_SIZES - 1] *
 322             (pre_last_prob - cur_last_prob);
 323
 324     // p16x16
 325     for (j = 0; j < TX_SIZES - 2; ++j) {
 326       diff += (int)counts->tx.p16x16[i][j] *
 327               (pre_fc->tx_probs.p16x16[i][j] - cur_fc->tx_probs.p16x16[i][j]);
 328     }
 329     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 330     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p16x16[i][TX_SIZES - 3];
 331
 332     diff += (int)counts->tx.p16x16[i][TX_SIZES - 2] *
 333             (pre_last_prob - cur_last_prob);
 334
 335     // p8x8
 336     for (j = 0; j < TX_SIZES - 3; ++j) {
 337       diff += (int)counts->tx.p8x8[i][j] *
 338               (pre_fc->tx_probs.p8x8[i][j] - cur_fc->tx_probs.p8x8[i][j]);
 339     }
 340     pre_last_prob = MAX_PROB - pre_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 341     cur_last_prob = MAX_PROB - cur_fc->tx_probs.p8x8[i][TX_SIZES - 4];
 342
 343     diff +=
 344         (int)counts->tx.p8x8[i][TX_SIZES - 3] * (pre_last_prob - cur_last_prob);
 345   }
 346
 347   // skip_probs
 348   for (i = 0; i < SKIP_CONTEXTS; ++i) {
 349     diff += (int)counts->skip[i][0] *
 350             (pre_fc->skip_probs[i] - cur_fc->skip_probs[i]);
 351
 352     pre_last_prob = MAX_PROB - pre_fc->skip_probs[i];
 353     cur_last_prob = MAX_PROB - cur_fc->skip_probs[i];
 354
 355     diff += (int)counts->skip[i][1] * (pre_last_prob - cur_last_prob);
 356   }
 357
 358   // mv
 359   for (i = 0; i < MV_JOINTS - 1; ++i) {
 360     diff += (int)counts->mv.joints[i] *
 361             (pre_fc->nmvc.joints[i] - cur_fc->nmvc.joints[i]);
 362   }
 363   pre_last_prob = MAX_PROB - pre_fc->nmvc.joints[MV_JOINTS - 2];
 364   cur_last_prob = MAX_PROB - cur_fc->nmvc.joints[MV_JOINTS - 2];
 365
 366   diff +=
 367       (int)counts->mv.joints[MV_JOINTS - 1] * (pre_last_prob - cur_last_prob);
 368
 369   for (i = 0; i < 2; ++i) {
 370     const nmv_component_counts *nmv_count = &counts->mv.comps[i];
 371     const nmv_component *pre_nmv_prob = &pre_fc->nmvc.comps[i];
 372     const nmv_component *cur_nmv_prob = &cur_fc->nmvc.comps[i];
 373
 374     // sign
 375     diff += (int)nmv_count->sign[0] * (pre_nmv_prob->sign - cur_nmv_prob->sign);
 376
 377     pre_last_prob = MAX_PROB - pre_nmv_prob->sign;
 378     cur_last_prob = MAX_PROB - cur_nmv_prob->sign;
 379
 380     diff += (int)nmv_count->sign[1] * (pre_last_prob - cur_last_prob);
 381
 382     // classes
 383     for (j = 0; j < MV_CLASSES - 1; ++j) {
 384       diff += (int)nmv_count->classes[j] *
 385               (pre_nmv_prob->classes[j] - cur_nmv_prob->classes[j]);
 386     }
 387     pre_last_prob = MAX_PROB - pre_nmv_prob->classes[MV_CLASSES - 2];
 388     cur_last_prob = MAX_PROB - cur_nmv_prob->classes[MV_CLASSES - 2];
 389
 390     diff += (int)nmv_count->classes[MV_CLASSES - 1] *
 391             (pre_last_prob - cur_last_prob);
 392
 393     // class0
 394     for (j = 0; j < CLASS0_SIZE - 1; ++j) {
 395       diff += (int)nmv_count->class0[j] *
 396               (pre_nmv_prob->class0[j] - cur_nmv_prob->class0[j]);
 397     }
 398     pre_last_prob = MAX_PROB - pre_nmv_prob->class0[CLASS0_SIZE - 2];
 399     cur_last_prob = MAX_PROB - cur_nmv_prob->class0[CLASS0_SIZE - 2];
 400
 401     diff += (int)nmv_count->class0[CLASS0_SIZE - 1] *
 402             (pre_last_prob - cur_last_prob);
 403
 404     // bits
 405     for (j = 0; j < MV_OFFSET_BITS; ++j) {
 406       diff += (int)nmv_count->bits[j][0] *
 407               (pre_nmv_prob->bits[j] - cur_nmv_prob->bits[j]);
 408
 409       pre_last_prob = MAX_PROB - pre_nmv_prob->bits[j];
 410       cur_last_prob = MAX_PROB - cur_nmv_prob->bits[j];
 411
 412       diff += (int)nmv_count->bits[j][1] * (pre_last_prob - cur_last_prob);
 413     }
 414
 415     // class0_fp
 416     for (j = 0; j < CLASS0_SIZE; ++j) {
 417       for (k = 0; k < MV_FP_SIZE - 1; ++k) {
 418         diff += (int)nmv_count->class0_fp[j][k] *
 419                 (pre_nmv_prob->class0_fp[j][k] - cur_nmv_prob->class0_fp[j][k]);
 420       }
 421       pre_last_prob = MAX_PROB - pre_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 422       cur_last_prob = MAX_PROB - cur_nmv_prob->class0_fp[j][MV_FP_SIZE - 2];
 423
 424       diff += (int)nmv_count->class0_fp[j][MV_FP_SIZE - 1] *
 425               (pre_last_prob - cur_last_prob);
 426     }
 427
 428     // fp
 429     for (j = 0; j < MV_FP_SIZE - 1; ++j) {
 430       diff +=
 431           (int)nmv_count->fp[j] * (pre_nmv_prob->fp[j] - cur_nmv_prob->fp[j]);
 432     }
 433     pre_last_prob = MAX_PROB - pre_nmv_prob->fp[MV_FP_SIZE - 2];
 434     cur_last_prob = MAX_PROB - cur_nmv_prob->fp[MV_FP_SIZE - 2];
 435
 436     diff +=
 437         (int)nmv_count->fp[MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob);
 438
 439     // class0_hp
 440     diff += (int)nmv_count->class0_hp[0] *
 441             (pre_nmv_prob->class0_hp - cur_nmv_prob->class0_hp);
 442
 443     pre_last_prob = MAX_PROB - pre_nmv_prob->class0_hp;
 444     cur_last_prob = MAX_PROB - cur_nmv_prob->class0_hp;
 445
 446     diff += (int)nmv_count->class0_hp[1] * (pre_last_prob - cur_last_prob);
 447
 448     // hp
 449     diff += (int)nmv_count->hp[0] * (pre_nmv_prob->hp - cur_nmv_prob->hp);
 450
 451     pre_last_prob = MAX_PROB - pre_nmv_prob->hp;
 452     cur_last_prob = MAX_PROB - cur_nmv_prob->hp;
 453
 454     diff += (int)nmv_count->hp[1] * (pre_last_prob - cur_last_prob);
 455   }
 456
 457   return -diff;
 458 }
 459 #endif  // !CONFIG_REALTIME_ONLY
 460
 461 // Test for whether to calculate metrics for the frame.
 462 static int is_psnr_calc_enabled(VP9_COMP *cpi) {
 463   VP9_COMMON *const cm = &cpi->common;
 464   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 465
 466   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
 467 }
 468
 469 /* clang-format off */
 470 const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = {
 471   //         sample rate    size   breadth  bitrate  cpb
 472   { LEVEL_1,   829440,      36864,    512,   200,    400,    2, 1,  4,  8 },
 473   { LEVEL_1_1, 2764800,     73728,    768,   800,    1000,   2, 1,  4,  8 },
 474   { LEVEL_2,   4608000,     122880,   960,   1800,   1500,   2, 1,  4,  8 },
 475   { LEVEL_2_1, 9216000,     245760,   1344,  3600,   2800,   2, 2,  4,  8 },
 476   { LEVEL_3,   20736000,    552960,   2048,  7200,   6000,   2, 4,  4,  8 },
 477   { LEVEL_3_1, 36864000,    983040,   2752,  12000,  10000,  2, 4,  4,  8 },
 478   { LEVEL_4,   83558400,    2228224,  4160,  18000,  16000,  4, 4,  4,  8 },
 479   { LEVEL_4_1, 160432128,   2228224,  4160,  30000,  18000,  4, 4,  5,  6 },
 480   { LEVEL_5,   311951360,   8912896,  8384,  60000,  36000,  6, 8,  6,  4 },
 481   { LEVEL_5_1, 588251136,   8912896,  8384,  120000, 46000,  8, 8,  10, 4 },
 482   // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when
 483   // they are finalized (currently tentative).
 484   { LEVEL_5_2, 1176502272,  8912896,  8384,  180000, 90000,  8, 8,  10, 4 },
 485   { LEVEL_6,   1176502272,  35651584, 16832, 180000, 90000,  8, 16, 10, 4 },
 486   { LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 },
 487   { LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 },
 488 };
 489 /* clang-format on */
 490
 491 static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = {
 492   "The average bit-rate is too high.",
 493   "The picture size is too large.",
 494   "The picture width/height is too large.",
 495   "The luma sample rate is too large.",
 496   "The CPB size is too large.",
 497   "The compression ratio is too small",
 498   "Too many column tiles are used.",
 499   "The alt-ref distance is too small.",
 500   "Too many reference buffers are used."
 501 };
 502
 503 static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
 504   switch (mode) {
 505     case NORMAL:
 506       *hr = 1;
 507       *hs = 1;
 508       break;
 509     case FOURFIVE:
 510       *hr = 4;
 511       *hs = 5;
 512       break;
 513     case THREEFIVE:
 514       *hr = 3;
 515       *hs = 5;
 516       break;
 517     default:
 518       assert(mode == ONETWO);
 519       *hr = 1;
 520       *hs = 2;
 521       break;
 522   }
 523 }
 524
 525 // Mark all inactive blocks as active. Other segmentation features may be set
 526 // so memset cannot be used, instead only inactive blocks should be reset.
 527 static void suppress_active_map(VP9_COMP *cpi) {
 528   unsigned char *const seg_map = cpi->segmentation_map;
 529
 530   if (cpi->active_map.enabled || cpi->active_map.update) {
 531     const int rows = cpi->common.mi_rows;
 532     const int cols = cpi->common.mi_cols;
 533     int i;
 534
 535     for (i = 0; i < rows * cols; ++i)
 536       if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
 537         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 538   }
 539 }
 540
 541 static void apply_active_map(VP9_COMP *cpi) {
 542   struct segmentation *const seg = &cpi->common.seg;
 543   unsigned char *const seg_map = cpi->segmentation_map;
 544   const unsigned char *const active_map = cpi->active_map.map;
 545   int i;
 546
 547   assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
 548
 549   if (frame_is_intra_only(&cpi->common)) {
 550     cpi->active_map.enabled = 0;
 551     cpi->active_map.update = 1;
 552   }
 553
 554   if (cpi->active_map.update) {
 555     if (cpi->active_map.enabled) {
 556       for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
 557         if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
 558       vp9_enable_segmentation(seg);
 559       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 560       vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 561       // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
 562       // filter level being zero regardless of the value of seg->abs_delta.
 563       vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
 564                       -MAX_LOOP_FILTER);
 565     } else {
 566       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
 567       vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
 568       if (seg->enabled) {
 569         seg->update_data = 1;
 570         seg->update_map = 1;
 571       }
 572     }
 573     cpi->active_map.update = 0;
 574   }
 575 }
 576
 577 static void apply_roi_map(VP9_COMP *cpi) {
 578   VP9_COMMON *cm = &cpi->common;
 579   struct segmentation *const seg = &cm->seg;
 580   vpx_roi_map_t *roi = &cpi->roi;
 581   const int *delta_q = roi->delta_q;
 582   const int *delta_lf = roi->delta_lf;
 583   const int *skip = roi->skip;
 584   int ref_frame[8];
 585   int internal_delta_q[MAX_SEGMENTS];
 586   int i;
 587   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
 588                                     VP9_ALT_FLAG };
 589
 590   // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
 591   // realtime mode.
 592   if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
 593   if (!roi->enabled) return;
 594
 595   memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
 596
 597   vp9_enable_segmentation(seg);
 598   vp9_clearall_segfeatures(seg);
 599   // Select delta coding method;
 600   seg->abs_delta = SEGMENT_DELTADATA;
 601
 602   memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
 603
 604   for (i = 0; i < MAX_SEGMENTS; ++i) {
 605     // Translate the external delta q values to internal values.
 606     internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
 607     if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
 608     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
 609     vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
 610     if (internal_delta_q[i] != 0) {
 611       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
 612       vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
 613     }
 614     if (delta_lf[i] != 0) {
 615       vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
 616       vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
 617     }
 618     if (skip[i] != 0) {
 619       vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
 620       vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
 621     }
 622     if (ref_frame[i] >= 0) {
 623       int valid_ref = 1;
 624       // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
 625       if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
 626         valid_ref = 0;
 627       // If GOLDEN is selected, make sure it's set as reference.
 628       if (ref_frame[i] == GOLDEN_FRAME &&
 629           !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
 630         valid_ref = 0;
 631       }
 632       // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
 633       // same reference.
 634       if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
 635         ref_frame[i] = LAST_FRAME;
 636       if (valid_ref) {
 637         vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
 638         vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
 639       }
 640     }
 641   }
 642   roi->enabled = 1;
 643 }
 644
 645 static void init_level_info(Vp9LevelInfo *level_info) {
 646   Vp9LevelStats *const level_stats = &level_info->level_stats;
 647   Vp9LevelSpec *const level_spec = &level_info->level_spec;
 648
 649   memset(level_stats, 0, sizeof(*level_stats));
 650   memset(level_spec, 0, sizeof(*level_spec));
 651   level_spec->level = LEVEL_UNKNOWN;
 652   level_spec->min_altref_distance = INT_MAX;
 653 }
 654
 655 static int check_seg_range(int seg_data[8], int range) {
 656   return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
 657            abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
 658            abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
 659            abs(seg_data[6]) > range || abs(seg_data[7]) > range);
 660 }
 661
 662 VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
 663   int i;
 664   const Vp9LevelSpec *this_level;
 665
 666   vpx_clear_system_state();
 667
 668   for (i = 0; i < VP9_LEVELS; ++i) {
 669     this_level = &vp9_level_defs[i];
 670     if ((double)level_spec->max_luma_sample_rate >
 671             (double)this_level->max_luma_sample_rate *
 672                 (1 + SAMPLE_RATE_GRACE_P) ||
 673         level_spec->max_luma_picture_size > this_level->max_luma_picture_size ||
 674         level_spec->max_luma_picture_breadth >
 675             this_level->max_luma_picture_breadth ||
 676         level_spec->average_bitrate > this_level->average_bitrate ||
 677         level_spec->max_cpb_size > this_level->max_cpb_size ||
 678         level_spec->compression_ratio < this_level->compression_ratio ||
 679         level_spec->max_col_tiles > this_level->max_col_tiles ||
 680         level_spec->min_altref_distance < this_level->min_altref_distance ||
 681         level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers)
 682       continue;
 683     break;
 684   }
 685   return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
 686 }
 687
 688 int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
 689                     unsigned int cols, int delta_q[8], int delta_lf[8],
 690                     int skip[8], int ref_frame[8]) {
 691   VP9_COMMON *cm = &cpi->common;
 692   vpx_roi_map_t *roi = &cpi->roi;
 693   const int range = 63;
 694   const int ref_frame_range = 3;  // Alt-ref
 695   const int skip_range = 1;
 696   const int frame_rows = cpi->common.mi_rows;
 697   const int frame_cols = cpi->common.mi_cols;
 698
 699   // Check number of rows and columns match
 700   if (frame_rows != (int)rows || frame_cols != (int)cols) {
 701     return -1;
 702   }
 703
 704   if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
 705       !check_seg_range(ref_frame, ref_frame_range) ||
 706       !check_seg_range(skip, skip_range))
 707     return -1;
 708
 709   // Also disable segmentation if no deltas are specified.
 710   if (!map ||
 711       (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
 712          delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
 713          delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
 714          delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
 715          skip[5] | skip[6] | skip[7]) &&
 716        (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
 717         ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
 718         ref_frame[6] == -1 && ref_frame[7] == -1))) {
 719     vp9_disable_segmentation(&cm->seg);
 720     cpi->roi.enabled = 0;
 721     return 0;
 722   }
 723
 724   if (roi->roi_map) {
 725     vpx_free(roi->roi_map);
 726     roi->roi_map = NULL;
 727   }
 728   CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
 729
 730   // Copy to ROI sturcture in the compressor.
 731   memcpy(roi->roi_map, map, rows * cols);
 732   memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
 733   memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
 734   memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
 735   memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
 736   roi->enabled = 1;
 737   roi->rows = rows;
 738   roi->cols = cols;
 739
 740   return 0;
 741 }
 742
 743 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 744                        int cols) {
 745   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
 746     unsigned char *const active_map_8x8 = cpi->active_map.map;
 747     const int mi_rows = cpi->common.mi_rows;
 748     const int mi_cols = cpi->common.mi_cols;
 749     cpi->active_map.update = 1;
 750     if (new_map_16x16) {
 751       int r, c;
 752       for (r = 0; r < mi_rows; ++r) {
 753         for (c = 0; c < mi_cols; ++c) {
 754           active_map_8x8[r * mi_cols + c] =
 755               new_map_16x16[(r >> 1) * cols + (c >> 1)]
 756                   ? AM_SEGMENT_ID_ACTIVE
 757                   : AM_SEGMENT_ID_INACTIVE;
 758         }
 759       }
 760       cpi->active_map.enabled = 1;
 761     } else {
 762       cpi->active_map.enabled = 0;
 763     }
 764     return 0;
 765   } else {
 766     return -1;
 767   }
 768 }
 769
 770 int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
 771                        int cols) {
 772   if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
 773       new_map_16x16) {
 774     unsigned char *const seg_map_8x8 = cpi->segmentation_map;
 775     const int mi_rows = cpi->common.mi_rows;
 776     const int mi_cols = cpi->common.mi_cols;
 777     memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
 778     if (cpi->active_map.enabled) {
 779       int r, c;
 780       for (r = 0; r < mi_rows; ++r) {
 781         for (c = 0; c < mi_cols; ++c) {
 782           // Cyclic refresh segments are considered active despite not having
 783           // AM_SEGMENT_ID_ACTIVE
 784           new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
 785               seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
 786         }
 787       }
 788     }
 789     return 0;
 790   } else {
 791     return -1;
 792   }
 793 }
 794
 795 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
 796   MACROBLOCK *const mb = &cpi->td.mb;
 797   cpi->common.allow_high_precision_mv = allow_high_precision_mv;
 798   if (cpi->common.allow_high_precision_mv) {
 799     mb->mvcost = mb->nmvcost_hp;
 800     mb->mvsadcost = mb->nmvsadcost_hp;
 801   } else {
 802     mb->mvcost = mb->nmvcost;
 803     mb->mvsadcost = mb->nmvsadcost;
 804   }
 805 }
 806
 807 static void setup_frame(VP9_COMP *cpi) {
 808   VP9_COMMON *const cm = &cpi->common;
 809   // Set up entropy context depending on frame type. The decoder mandates
 810   // the use of the default context, index 0, for keyframes and inter
 811   // frames where the error_resilient_mode or intra_only flag is set. For
 812   // other inter-frames the encoder currently uses only two contexts;
 813   // context 1 for ALTREF frames and context 0 for the others.
 814   if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
 815     vp9_setup_past_independence(cm);
 816   } else {
 817     if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame;
 818   }
 819
 820   // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF
 821   // case. Need some further investigation on if we could apply this to single
 822   // layer ARF case as well.
 823   if (cpi->multi_layer_arf && !cpi->use_svc) {
 824     GF_GROUP *const gf_group = &cpi->twopass.gf_group;
 825     const int gf_group_index = gf_group->index;
 826     const int boost_frame =
 827         !cpi->rc.is_src_frame_alt_ref &&
 828         (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
 829
 830     // frame_context_idx           Frame Type
 831     //        0              Intra only frame, base layer ARF
 832     //        1              ARFs with layer depth = 2,3
 833     //        2              ARFs with layer depth > 3
 834     //        3              Non-boosted frames
 835     if (frame_is_intra_only(cm)) {
 836       cm->frame_context_idx = 0;
 837     } else if (boost_frame) {
 838       if (gf_group->rf_level[gf_group_index] == GF_ARF_STD)
 839         cm->frame_context_idx = 0;
 840       else if (gf_group->layer_depth[gf_group_index] <= 3)
 841         cm->frame_context_idx = 1;
 842       else
 843         cm->frame_context_idx = 2;
 844     } else {
 845       cm->frame_context_idx = 3;
 846     }
 847   }
 848
 849   if (cm->frame_type == KEY_FRAME) {
 850     cpi->refresh_golden_frame = 1;
 851     cpi->refresh_alt_ref_frame = 1;
 852     vp9_zero(cpi->interp_filter_selected);
 853   } else {
 854     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
 855     vp9_zero(cpi->interp_filter_selected[0]);
 856   }
 857 }
 858
 859 static void vp9_enc_setup_mi(VP9_COMMON *cm) {
 860   int i;
 861   cm->mi = cm->mip + cm->mi_stride + 1;
 862   memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
 863   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 864   // Clear top border row
 865   memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
 866   // Clear left border column
 867   for (i = 1; i < cm->mi_rows + 1; ++i)
 868     memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
 869
 870   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 871   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 872
 873   memset(cm->mi_grid_base, 0,
 874          cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 875 }
 876
 877 static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
 878   cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
 879   if (!cm->mip) return 1;
 880   cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
 881   if (!cm->prev_mip) return 1;
 882   cm->mi_alloc_size = mi_size;
 883
 884   cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 885   if (!cm->mi_grid_base) return 1;
 886   cm->prev_mi_grid_base =
 887       (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
 888   if (!cm->prev_mi_grid_base) return 1;
 889
 890   return 0;
 891 }
 892
 893 static void vp9_enc_free_mi(VP9_COMMON *cm) {
 894   vpx_free(cm->mip);
 895   cm->mip = NULL;
 896   vpx_free(cm->prev_mip);
 897   cm->prev_mip = NULL;
 898   vpx_free(cm->mi_grid_base);
 899   cm->mi_grid_base = NULL;
 900   vpx_free(cm->prev_mi_grid_base);
 901   cm->prev_mi_grid_base = NULL;
 902   cm->mi_alloc_size = 0;
 903 }
 904
 905 static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
 906   // Current mip will be the prev_mip for the next frame.
 907   MODE_INFO **temp_base = cm->prev_mi_grid_base;
 908   MODE_INFO *temp = cm->prev_mip;
 909
 910   // Skip update prev_mi frame in show_existing_frame mode.
 911   if (cm->show_existing_frame) return;
 912
 913   cm->prev_mip = cm->mip;
 914   cm->mip = temp;
 915
 916   // Update the upper left visible macroblock ptrs.
 917   cm->mi = cm->mip + cm->mi_stride + 1;
 918   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
 919
 920   cm->prev_mi_grid_base = cm->mi_grid_base;
 921   cm->mi_grid_base = temp_base;
 922   cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
 923   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 924 }
 925
 926 void vp9_initialize_enc(void) {
 927   static volatile int init_done = 0;
 928
 929   if (!init_done) {
 930     vp9_rtcd();
 931     vpx_dsp_rtcd();
 932     vpx_scale_rtcd();
 933     vp9_init_intra_predictors();
 934     vp9_init_me_luts();
 935     vp9_rc_init_minq_luts();
 936     vp9_entropy_mv_init();
 937 #if !CONFIG_REALTIME_ONLY
 938     vp9_temporal_filter_init();
 939 #endif
 940     init_done = 1;
 941   }
 942 }
 943
 944 static void dealloc_compressor_data(VP9_COMP *cpi) {
 945   VP9_COMMON *const cm = &cpi->common;
 946   int i;
 947
 948   vpx_free(cpi->mbmi_ext_base);
 949   cpi->mbmi_ext_base = NULL;
 950
 951   vpx_free(cpi->tile_data);
 952   cpi->tile_data = NULL;
 953
 954   vpx_free(cpi->segmentation_map);
 955   cpi->segmentation_map = NULL;
 956   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
 957   cpi->coding_context.last_frame_seg_map_copy = NULL;
 958
 959   vpx_free(cpi->nmvcosts[0]);
 960   vpx_free(cpi->nmvcosts[1]);
 961   cpi->nmvcosts[0] = NULL;
 962   cpi->nmvcosts[1] = NULL;
 963
 964   vpx_free(cpi->nmvcosts_hp[0]);
 965   vpx_free(cpi->nmvcosts_hp[1]);
 966   cpi->nmvcosts_hp[0] = NULL;
 967   cpi->nmvcosts_hp[1] = NULL;
 968
 969   vpx_free(cpi->nmvsadcosts[0]);
 970   vpx_free(cpi->nmvsadcosts[1]);
 971   cpi->nmvsadcosts[0] = NULL;
 972   cpi->nmvsadcosts[1] = NULL;
 973
 974   vpx_free(cpi->nmvsadcosts_hp[0]);
 975   vpx_free(cpi->nmvsadcosts_hp[1]);
 976   cpi->nmvsadcosts_hp[0] = NULL;
 977   cpi->nmvsadcosts_hp[1] = NULL;
 978
 979   vpx_free(cpi->skin_map);
 980   cpi->skin_map = NULL;
 981
 982   vpx_free(cpi->prev_partition);
 983   cpi->prev_partition = NULL;
 984
 985   vpx_free(cpi->svc.prev_partition_svc);
 986   cpi->svc.prev_partition_svc = NULL;
 987
 988   vpx_free(cpi->prev_segment_id);
 989   cpi->prev_segment_id = NULL;
 990
 991   vpx_free(cpi->prev_variance_low);
 992   cpi->prev_variance_low = NULL;
 993
 994   vpx_free(cpi->copied_frame_cnt);
 995   cpi->copied_frame_cnt = NULL;
 996
 997   vpx_free(cpi->content_state_sb_fd);
 998   cpi->content_state_sb_fd = NULL;
 999
1000   vpx_free(cpi->count_arf_frame_usage);
1001   cpi->count_arf_frame_usage = NULL;
1002   vpx_free(cpi->count_lastgolden_frame_usage);
1003   cpi->count_lastgolden_frame_usage = NULL;
1004
1005   vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1006   cpi->cyclic_refresh = NULL;
1007
1008   vpx_free(cpi->active_map.map);
1009   cpi->active_map.map = NULL;
1010
1011   vpx_free(cpi->roi.roi_map);
1012   cpi->roi.roi_map = NULL;
1013
1014   vpx_free(cpi->consec_zero_mv);
1015   cpi->consec_zero_mv = NULL;
1016
1017   vpx_free(cpi->mb_wiener_variance);
1018   cpi->mb_wiener_variance = NULL;
1019
1020   vpx_free(cpi->mi_ssim_rdmult_scaling_factors);
1021   cpi->mi_ssim_rdmult_scaling_factors = NULL;
1022
1023   vp9_free_ref_frame_buffers(cm->buffer_pool);
1024 #if CONFIG_VP9_POSTPROC
1025   vp9_free_postproc_buffers(cm);
1026 #endif
1027   vp9_free_context_buffers(cm);
1028
1029   vpx_free_frame_buffer(&cpi->last_frame_uf);
1030   vpx_free_frame_buffer(&cpi->scaled_source);
1031   vpx_free_frame_buffer(&cpi->scaled_last_source);
1032   vpx_free_frame_buffer(&cpi->alt_ref_buffer);
1033 #ifdef ENABLE_KF_DENOISE
1034   vpx_free_frame_buffer(&cpi->raw_unscaled_source);
1035   vpx_free_frame_buffer(&cpi->raw_scaled_source);
1036 #endif
1037
1038   vp9_lookahead_destroy(cpi->lookahead);
1039
1040   vpx_free(cpi->tile_tok[0][0]);
1041   cpi->tile_tok[0][0] = 0;
1042
1043   vpx_free(cpi->tplist[0][0]);
1044   cpi->tplist[0][0] = NULL;
1045
1046   vp9_free_pc_tree(&cpi->td);
1047
1048   for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
1049     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i];
1050     vpx_free(lc->rc_twopass_stats_in.buf);
1051     lc->rc_twopass_stats_in.buf = NULL;
1052     lc->rc_twopass_stats_in.sz = 0;
1053   }
1054
1055   if (cpi->source_diff_var != NULL) {
1056     vpx_free(cpi->source_diff_var);
1057     cpi->source_diff_var = NULL;
1058   }
1059
1060   for (i = 0; i < MAX_LAG_BUFFERS; ++i) {
1061     vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]);
1062   }
1063   memset(&cpi->svc.scaled_frames[0], 0,
1064          MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0]));
1065
1066   vpx_free_frame_buffer(&cpi->svc.scaled_temp);
1067   memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp));
1068
1069   vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
1070   memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
1071
1072   vp9_free_svc_cyclic_refresh(cpi);
1073 }
1074
1075 static void save_coding_context(VP9_COMP *cpi) {
1076   CODING_CONTEXT *const cc = &cpi->coding_context;
1077   VP9_COMMON *cm = &cpi->common;
1078
1079   // Stores a snapshot of key state variables which can subsequently be
1080   // restored with a call to vp9_restore_coding_context. These functions are
1081   // intended for use in a re-code loop in vp9_compress_frame where the
1082   // quantizer value is adjusted between loop iterations.
1083   vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
1084
1085   memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
1086          MV_VALS * sizeof(*cpi->nmvcosts[0]));
1087   memcpy(cc->nmvcosts[1], cpi->nmvcosts[1],
1088          MV_VALS * sizeof(*cpi->nmvcosts[1]));
1089   memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0],
1090          MV_VALS * sizeof(*cpi->nmvcosts_hp[0]));
1091   memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
1092          MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
1093
1094   vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
1095
1096   memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
1097          (cm->mi_rows * cm->mi_cols));
1098
1099   vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
1100   vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
1101
1102   cc->fc = *cm->fc;
1103 }
1104
1105 static void restore_coding_context(VP9_COMP *cpi) {
1106   CODING_CONTEXT *const cc = &cpi->coding_context;
1107   VP9_COMMON *cm = &cpi->common;
1108
1109   // Restore key state variables to the snapshot state stored in the
1110   // previous call to vp9_save_coding_context.
1111   vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
1112
1113   memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
1114   memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
1115   memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0],
1116          MV_VALS * sizeof(*cc->nmvcosts_hp[0]));
1117   memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
1118          MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
1119
1120   vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
1121
1122   memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
1123          (cm->mi_rows * cm->mi_cols));
1124
1125   vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
1126   vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
1127
1128   *cm->fc = cc->fc;
1129 }
1130
1131 #if !CONFIG_REALTIME_ONLY
1132 static void configure_static_seg_features(VP9_COMP *cpi) {
1133   VP9_COMMON *const cm = &cpi->common;
1134   const RATE_CONTROL *const rc = &cpi->rc;
1135   struct segmentation *const seg = &cm->seg;
1136
1137   int high_q = (int)(rc->avg_q > 48.0);
1138   int qi_delta;
1139
1140   // Disable and clear down for KF
1141   if (cm->frame_type == KEY_FRAME) {
1142     // Clear down the global segmentation map
1143     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1144     seg->update_map = 0;
1145     seg->update_data = 0;
1146     cpi->static_mb_pct = 0;
1147
1148     // Disable segmentation
1149     vp9_disable_segmentation(seg);
1150
1151     // Clear down the segment features.
1152     vp9_clearall_segfeatures(seg);
1153   } else if (cpi->refresh_alt_ref_frame) {
1154     // If this is an alt ref frame
1155     // Clear down the global segmentation map
1156     memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1157     seg->update_map = 0;
1158     seg->update_data = 0;
1159     cpi->static_mb_pct = 0;
1160
1161     // Disable segmentation and individual segment features by default
1162     vp9_disable_segmentation(seg);
1163     vp9_clearall_segfeatures(seg);
1164
1165     // Scan frames from current to arf frame.
1166     // This function re-enables segmentation if appropriate.
1167     vp9_update_mbgraph_stats(cpi);
1168
1169     // If segmentation was enabled set those features needed for the
1170     // arf itself.
1171     if (seg->enabled) {
1172       seg->update_map = 1;
1173       seg->update_data = 1;
1174
1175       qi_delta =
1176           vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
1177       vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
1178       vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1179
1180       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1181       vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1182
1183       // Where relevant assume segment data is delta data
1184       seg->abs_delta = SEGMENT_DELTADATA;
1185     }
1186   } else if (seg->enabled) {
1187     // All other frames if segmentation has been enabled
1188
1189     // First normal frame in a valid gf or alt ref group
1190     if (rc->frames_since_golden == 0) {
1191       // Set up segment features for normal frames in an arf group
1192       if (rc->source_alt_ref_active) {
1193         seg->update_map = 0;
1194         seg->update_data = 1;
1195         seg->abs_delta = SEGMENT_DELTADATA;
1196
1197         qi_delta =
1198             vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, cm->bit_depth);
1199         vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
1200         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
1201
1202         vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
1203         vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
1204
1205         // Segment coding disabled for compred testing
1206         if (high_q || (cpi->static_mb_pct == 100)) {
1207           vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1208           vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1209           vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1210         }
1211       } else {
1212         // Disable segmentation and clear down features if alt ref
1213         // is not active for this group
1214
1215         vp9_disable_segmentation(seg);
1216
1217         memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
1218
1219         seg->update_map = 0;
1220         seg->update_data = 0;
1221
1222         vp9_clearall_segfeatures(seg);
1223       }
1224     } else if (rc->is_src_frame_alt_ref) {
1225       // Special case where we are coding over the top of a previous
1226       // alt ref frame.
1227       // Segment coding disabled for compred testing
1228
1229       // Enable ref frame features for segment 0 as well
1230       vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
1231       vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
1232
1233       // All mbs should use ALTREF_FRAME
1234       vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
1235       vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1236       vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
1237       vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
1238
1239       // Skip all MBs if high Q (0,0 mv and skip coeffs)
1240       if (high_q) {
1241         vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP);
1242         vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP);
1243       }
1244       // Enable data update
1245       seg->update_data = 1;
1246     } else {
1247       // All other frames.
1248
1249       // No updates.. leave things as they are.
1250       seg->update_map = 0;
1251       seg->update_data = 0;
1252     }
1253   }
1254 }
1255 #endif  // !CONFIG_REALTIME_ONLY
1256
1257 static void update_reference_segmentation_map(VP9_COMP *cpi) {
1258   VP9_COMMON *const cm = &cpi->common;
1259   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
1260   uint8_t *cache_ptr = cm->last_frame_seg_map;
1261   int row, col;
1262
1263   for (row = 0; row < cm->mi_rows; row++) {
1264     MODE_INFO **mi_8x8 = mi_8x8_ptr;
1265     uint8_t *cache = cache_ptr;
1266     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
1267       cache[0] = mi_8x8[0]->segment_id;
1268     mi_8x8_ptr += cm->mi_stride;
1269     cache_ptr += cm->mi_cols;
1270   }
1271 }
1272
1273 static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
1274   VP9_COMMON *cm = &cpi->common;
1275   const VP9EncoderConfig *oxcf = &cpi->oxcf;
1276
1277   if (!cpi->lookahead)
1278     cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height,
1279                                         cm->subsampling_x, cm->subsampling_y,
1280 #if CONFIG_VP9_HIGHBITDEPTH
1281                                         cm->use_highbitdepth,
1282 #endif
1283                                         oxcf->lag_in_frames);
1284   if (!cpi->lookahead)
1285     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1286                        "Failed to allocate lag buffers");
1287
1288   // TODO(agrange) Check if ARF is enabled and skip allocation if not.
1289   if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
1290                                cm->subsampling_x, cm->subsampling_y,
1291 #if CONFIG_VP9_HIGHBITDEPTH
1292                                cm->use_highbitdepth,
1293 #endif
1294                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1295                                NULL, NULL, NULL))
1296     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1297                        "Failed to allocate altref buffer");
1298 }
1299
1300 static void alloc_util_frame_buffers(VP9_COMP *cpi) {
1301   VP9_COMMON *const cm = &cpi->common;
1302   if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height,
1303                                cm->subsampling_x, cm->subsampling_y,
1304 #if CONFIG_VP9_HIGHBITDEPTH
1305                                cm->use_highbitdepth,
1306 #endif
1307                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1308                                NULL, NULL, NULL))
1309     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1310                        "Failed to allocate last frame buffer");
1311
1312   if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height,
1313                                cm->subsampling_x, cm->subsampling_y,
1314 #if CONFIG_VP9_HIGHBITDEPTH
1315                                cm->use_highbitdepth,
1316 #endif
1317                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1318                                NULL, NULL, NULL))
1319     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1320                        "Failed to allocate scaled source buffer");
1321
1322   // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
1323   // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
1324   // target of 1/4x1/4. number_spatial_layers must be greater than 2.
1325   if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
1326       cpi->svc.number_spatial_layers > 2) {
1327     cpi->svc.scaled_temp_is_alloc = 1;
1328     if (vpx_realloc_frame_buffer(
1329             &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
1330             cm->subsampling_x, cm->subsampling_y,
1331 #if CONFIG_VP9_HIGHBITDEPTH
1332             cm->use_highbitdepth,
1333 #endif
1334             VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL))
1335       vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
1336                          "Failed to allocate scaled_frame for svc ");
1337   }
1338
1339   if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height,
1340                                cm->subsampling_x, cm->subsampling_y,
1341 #if CONFIG_VP9_HIGHBITDEPTH
1342                                cm->use_highbitdepth,
1343 #endif
1344                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1345                                NULL, NULL, NULL))
1346     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1347                        "Failed to allocate scaled last source buffer");
1348 #ifdef ENABLE_KF_DENOISE
1349   if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source, cm->width, cm->height,
1350                                cm->subsampling_x, cm->subsampling_y,
1351 #if CONFIG_VP9_HIGHBITDEPTH
1352                                cm->use_highbitdepth,
1353 #endif
1354                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1355                                NULL, NULL, NULL))
1356     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1357                        "Failed to allocate unscaled raw source frame buffer");
1358
1359   if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source, cm->width, cm->height,
1360                                cm->subsampling_x, cm->subsampling_y,
1361 #if CONFIG_VP9_HIGHBITDEPTH
1362                                cm->use_highbitdepth,
1363 #endif
1364                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
1365                                NULL, NULL, NULL))
1366     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
1367                        "Failed to allocate scaled raw source frame buffer");
1368 #endif
1369 }
1370
1371 static int alloc_context_buffers_ext(VP9_COMP *cpi) {
1372   VP9_COMMON *cm = &cpi->common;
1373   int mi_size = cm->mi_cols * cm->mi_rows;
1374
1375   cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
1376   if (!cpi->mbmi_ext_base) return 1;
1377
1378   return 0;
1379 }
1380
1381 static void alloc_compressor_data(VP9_COMP *cpi) {
1382   VP9_COMMON *cm = &cpi->common;
1383   int sb_rows;
1384
1385   vp9_alloc_context_buffers(cm, cm->width, cm->height);
1386
1387   alloc_context_buffers_ext(cpi);
1388
1389   vpx_free(cpi->tile_tok[0][0]);
1390
1391   {
1392     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
1393     CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
1394                     vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
1395   }
1396
1397   sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
1398   vpx_free(cpi->tplist[0][0]);
1399   CHECK_MEM_ERROR(
1400       cm, cpi->tplist[0][0],
1401       vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
1402
1403   vp9_setup_pc_tree(&cpi->common, &cpi->td);
1404 }
1405
1406 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
1407   cpi->framerate = framerate < 0.1 ? 30 : framerate;
1408   vp9_rc_update_framerate(cpi);
1409 }
1410
1411 static void set_tile_limits(VP9_COMP *cpi) {
1412   VP9_COMMON *const cm = &cpi->common;
1413
1414   int min_log2_tile_cols, max_log2_tile_cols;
1415   vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
1416
1417   cm->log2_tile_cols =
1418       clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
1419   cm->log2_tile_rows = cpi->oxcf.tile_rows;
1420
1421   if (cpi->oxcf.target_level == LEVEL_AUTO) {
1422     const int level_tile_cols =
1423         log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height);
1424     if (cm->log2_tile_cols > level_tile_cols) {
1425       cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
1426     }
1427   }
1428 }
1429
1430 static void update_frame_size(VP9_COMP *cpi) {
1431   VP9_COMMON *const cm = &cpi->common;
1432   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
1433
1434   vp9_set_mb_mi(cm, cm->width, cm->height);
1435   vp9_init_context_buffers(cm);
1436   vp9_init_macroblockd(cm, xd, NULL);
1437   cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base;
1438   memset(cpi->mbmi_ext_base, 0,
1439          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
1440
1441   set_tile_limits(cpi);
1442 }
1443
1444 static void init_buffer_indices(VP9_COMP *cpi) {
1445   int ref_frame;
1446
1447   for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1448     cpi->ref_fb_idx[ref_frame] = ref_frame;
1449
1450   cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
1451   cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
1452   cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
1453 }
1454
1455 static void init_level_constraint(LevelConstraint *lc) {
1456   lc->level_index = -1;
1457   lc->max_cpb_size = INT_MAX;
1458   lc->max_frame_size = INT_MAX;
1459   lc->rc_config_updated = 0;
1460   lc->fail_flag = 0;
1461 }
1462
1463 static void set_level_constraint(LevelConstraint *ls, int8_t level_index) {
1464   vpx_clear_system_state();
1465   ls->level_index = level_index;
1466   if (level_index >= 0) {
1467     ls->max_cpb_size = vp9_level_defs[level_index].max_cpb_size * (double)1000;
1468   }
1469 }
1470
1471 static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
1472   VP9_COMMON *const cm = &cpi->common;
1473
1474   cpi->oxcf = *oxcf;
1475   cpi->framerate = oxcf->init_framerate;
1476   cm->profile = oxcf->profile;
1477   cm->bit_depth = oxcf->bit_depth;
1478 #if CONFIG_VP9_HIGHBITDEPTH
1479   cm->use_highbitdepth = oxcf->use_highbitdepth;
1480 #endif
1481   cm->color_space = oxcf->color_space;
1482   cm->color_range = oxcf->color_range;
1483
1484   cpi->target_level = oxcf->target_level;
1485   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1486   set_level_constraint(&cpi->level_constraint,
1487                        get_level_index(cpi->target_level));
1488
1489   cm->width = oxcf->width;
1490   cm->height = oxcf->height;
1491   alloc_compressor_data(cpi);
1492
1493   cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
1494
1495   // Single thread case: use counts in common.
1496   cpi->td.counts = &cm->counts;
1497
1498   // Spatial scalability.
1499   cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
1500   // Temporal scalability.
1501   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
1502
1503   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
1504       ((cpi->svc.number_temporal_layers > 1 ||
1505         cpi->svc.number_spatial_layers > 1) &&
1506        cpi->oxcf.pass != 1)) {
1507     vp9_init_layer_context(cpi);
1508   }
1509
1510   // change includes all joint functionality
1511   vp9_change_config(cpi, oxcf);
1512
1513   cpi->static_mb_pct = 0;
1514   cpi->ref_frame_flags = 0;
1515
1516   init_buffer_indices(cpi);
1517
1518   vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
1519 }
1520
1521 static void set_rc_buffer_sizes(RATE_CONTROL *rc,
1522                                 const VP9EncoderConfig *oxcf) {
1523   const int64_t bandwidth = oxcf->target_bandwidth;
1524   const int64_t starting = oxcf->starting_buffer_level_ms;
1525   const int64_t optimal = oxcf->optimal_buffer_level_ms;
1526   const int64_t maximum = oxcf->maximum_buffer_size_ms;
1527
1528   rc->starting_buffer_level = starting * bandwidth / 1000;
1529   rc->optimal_buffer_level =
1530       (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
1531   rc->maximum_buffer_size =
1532       (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
1533 }
1534
1535 #if CONFIG_VP9_HIGHBITDEPTH
1536 // TODO(angiebird): make sdx8f available for highbitdepth if needed
1537 #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
1538   cpi->fn_ptr[BT].sdf = SDF;                             \
1539   cpi->fn_ptr[BT].sdaf = SDAF;                           \
1540   cpi->fn_ptr[BT].vf = VF;                               \
1541   cpi->fn_ptr[BT].svf = SVF;                             \
1542   cpi->fn_ptr[BT].svaf = SVAF;                           \
1543   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
1544   cpi->fn_ptr[BT].sdx8f = NULL;
1545
1546 #define MAKE_BFP_SAD_WRAPPER(fnname)                                           \
1547   static unsigned int fnname##_bits8(const uint8_t *src_ptr,                   \
1548                                      int source_stride,                        \
1549                                      const uint8_t *ref_ptr, int ref_stride) { \
1550     return fnname(src_ptr, source_stride, ref_ptr, ref_stride);                \
1551   }                                                                            \
1552   static unsigned int fnname##_bits10(                                         \
1553       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1554       int ref_stride) {                                                        \
1555     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;           \
1556   }                                                                            \
1557   static unsigned int fnname##_bits12(                                         \
1558       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1559       int ref_stride) {                                                        \
1560     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;           \
1561   }
1562
1563 #define MAKE_BFP_SADAVG_WRAPPER(fnname)                                        \
1564   static unsigned int fnname##_bits8(                                          \
1565       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1566       int ref_stride, const uint8_t *second_pred) {                            \
1567     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred);   \
1568   }                                                                            \
1569   static unsigned int fnname##_bits10(                                         \
1570       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1571       int ref_stride, const uint8_t *second_pred) {                            \
1572     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1573            2;                                                                  \
1574   }                                                                            \
1575   static unsigned int fnname##_bits12(                                         \
1576       const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr,       \
1577       int ref_stride, const uint8_t *second_pred) {                            \
1578     return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
1579            4;                                                                  \
1580   }
1581
1582 #define MAKE_BFP_SAD4D_WRAPPER(fnname)                                        \
1583   static void fnname##_bits8(const uint8_t *src_ptr, int source_stride,       \
1584                              const uint8_t *const ref_ptr[], int ref_stride,  \
1585                              unsigned int *sad_array) {                       \
1586     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1587   }                                                                           \
1588   static void fnname##_bits10(const uint8_t *src_ptr, int source_stride,      \
1589                               const uint8_t *const ref_ptr[], int ref_stride, \
1590                               unsigned int *sad_array) {                      \
1591     int i;                                                                    \
1592     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1593     for (i = 0; i < 4; i++) sad_array[i] >>= 2;                               \
1594   }                                                                           \
1595   static void fnname##_bits12(const uint8_t *src_ptr, int source_stride,      \
1596                               const uint8_t *const ref_ptr[], int ref_stride, \
1597                               unsigned int *sad_array) {                      \
1598     int i;                                                                    \
1599     fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array);           \
1600     for (i = 0; i < 4; i++) sad_array[i] >>= 4;                               \
1601   }
1602
1603 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
1604 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
1605 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
1606 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
1607 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
1608 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
1609 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
1610 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
1611 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
1612 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
1613 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
1614 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
1615 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
1616 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
1617 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
1618 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
1619 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
1620 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
1621 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
1622 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
1623 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
1624 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
1625 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
1626 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
1627 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
1628 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
1629 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
1630 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
1631 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
1632 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
1633 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
1634 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
1635 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
1636 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
1637 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
1638 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
1639 MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
1640 MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
1641 MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
1642
1643 static void highbd_set_var_fns(VP9_COMP *const cpi) {
1644   VP9_COMMON *const cm = &cpi->common;
1645   if (cm->use_highbitdepth) {
1646     switch (cm->bit_depth) {
1647       case VPX_BITS_8:
1648         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
1649                    vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
1650                    vpx_highbd_8_sub_pixel_variance32x16,
1651                    vpx_highbd_8_sub_pixel_avg_variance32x16,
1652                    vpx_highbd_sad32x16x4d_bits8)
1653
1654         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
1655                    vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
1656                    vpx_highbd_8_sub_pixel_variance16x32,
1657                    vpx_highbd_8_sub_pixel_avg_variance16x32,
1658                    vpx_highbd_sad16x32x4d_bits8)
1659
1660         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
1661                    vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
1662                    vpx_highbd_8_sub_pixel_variance64x32,
1663                    vpx_highbd_8_sub_pixel_avg_variance64x32,
1664                    vpx_highbd_sad64x32x4d_bits8)
1665
1666         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
1667                    vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
1668                    vpx_highbd_8_sub_pixel_variance32x64,
1669                    vpx_highbd_8_sub_pixel_avg_variance32x64,
1670                    vpx_highbd_sad32x64x4d_bits8)
1671
1672         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
1673                    vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
1674                    vpx_highbd_8_sub_pixel_variance32x32,
1675                    vpx_highbd_8_sub_pixel_avg_variance32x32,
1676                    vpx_highbd_sad32x32x4d_bits8)
1677
1678         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
1679                    vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
1680                    vpx_highbd_8_sub_pixel_variance64x64,
1681                    vpx_highbd_8_sub_pixel_avg_variance64x64,
1682                    vpx_highbd_sad64x64x4d_bits8)
1683
1684         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
1685                    vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
1686                    vpx_highbd_8_sub_pixel_variance16x16,
1687                    vpx_highbd_8_sub_pixel_avg_variance16x16,
1688                    vpx_highbd_sad16x16x4d_bits8)
1689
1690         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8,
1691                    vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8,
1692                    vpx_highbd_8_sub_pixel_variance16x8,
1693                    vpx_highbd_8_sub_pixel_avg_variance16x8,
1694                    vpx_highbd_sad16x8x4d_bits8)
1695
1696         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8,
1697                    vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16,
1698                    vpx_highbd_8_sub_pixel_variance8x16,
1699                    vpx_highbd_8_sub_pixel_avg_variance8x16,
1700                    vpx_highbd_sad8x16x4d_bits8)
1701
1702         HIGHBD_BFP(
1703             BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
1704             vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
1705             vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8)
1706
1707         HIGHBD_BFP(
1708             BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
1709             vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
1710             vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8)
1711
1712         HIGHBD_BFP(
1713             BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
1714             vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
1715             vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8)
1716
1717         HIGHBD_BFP(
1718             BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
1719             vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
1720             vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8)
1721         break;
1722
1723       case VPX_BITS_10:
1724         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
1725                    vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
1726                    vpx_highbd_10_sub_pixel_variance32x16,
1727                    vpx_highbd_10_sub_pixel_avg_variance32x16,
1728                    vpx_highbd_sad32x16x4d_bits10)
1729
1730         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
1731                    vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
1732                    vpx_highbd_10_sub_pixel_variance16x32,
1733                    vpx_highbd_10_sub_pixel_avg_variance16x32,
1734                    vpx_highbd_sad16x32x4d_bits10)
1735
1736         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
1737                    vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
1738                    vpx_highbd_10_sub_pixel_variance64x32,
1739                    vpx_highbd_10_sub_pixel_avg_variance64x32,
1740                    vpx_highbd_sad64x32x4d_bits10)
1741
1742         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
1743                    vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
1744                    vpx_highbd_10_sub_pixel_variance32x64,
1745                    vpx_highbd_10_sub_pixel_avg_variance32x64,
1746                    vpx_highbd_sad32x64x4d_bits10)
1747
1748         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
1749                    vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
1750                    vpx_highbd_10_sub_pixel_variance32x32,
1751                    vpx_highbd_10_sub_pixel_avg_variance32x32,
1752                    vpx_highbd_sad32x32x4d_bits10)
1753
1754         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
1755                    vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
1756                    vpx_highbd_10_sub_pixel_variance64x64,
1757                    vpx_highbd_10_sub_pixel_avg_variance64x64,
1758                    vpx_highbd_sad64x64x4d_bits10)
1759
1760         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
1761                    vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
1762                    vpx_highbd_10_sub_pixel_variance16x16,
1763                    vpx_highbd_10_sub_pixel_avg_variance16x16,
1764                    vpx_highbd_sad16x16x4d_bits10)
1765
1766         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
1767                    vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
1768                    vpx_highbd_10_sub_pixel_variance16x8,
1769                    vpx_highbd_10_sub_pixel_avg_variance16x8,
1770                    vpx_highbd_sad16x8x4d_bits10)
1771
1772         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
1773                    vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
1774                    vpx_highbd_10_sub_pixel_variance8x16,
1775                    vpx_highbd_10_sub_pixel_avg_variance8x16,
1776                    vpx_highbd_sad8x16x4d_bits10)
1777
1778         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10,
1779                    vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
1780                    vpx_highbd_10_sub_pixel_variance8x8,
1781                    vpx_highbd_10_sub_pixel_avg_variance8x8,
1782                    vpx_highbd_sad8x8x4d_bits10)
1783
1784         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
1785                    vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
1786                    vpx_highbd_10_sub_pixel_variance8x4,
1787                    vpx_highbd_10_sub_pixel_avg_variance8x4,
1788                    vpx_highbd_sad8x4x4d_bits10)
1789
1790         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
1791                    vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
1792                    vpx_highbd_10_sub_pixel_variance4x8,
1793                    vpx_highbd_10_sub_pixel_avg_variance4x8,
1794                    vpx_highbd_sad4x8x4d_bits10)
1795
1796         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10,
1797                    vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
1798                    vpx_highbd_10_sub_pixel_variance4x4,
1799                    vpx_highbd_10_sub_pixel_avg_variance4x4,
1800                    vpx_highbd_sad4x4x4d_bits10)
1801         break;
1802
1803       default:
1804         assert(cm->bit_depth == VPX_BITS_12);
1805         HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
1806                    vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
1807                    vpx_highbd_12_sub_pixel_variance32x16,
1808                    vpx_highbd_12_sub_pixel_avg_variance32x16,
1809                    vpx_highbd_sad32x16x4d_bits12)
1810
1811         HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
1812                    vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
1813                    vpx_highbd_12_sub_pixel_variance16x32,
1814                    vpx_highbd_12_sub_pixel_avg_variance16x32,
1815                    vpx_highbd_sad16x32x4d_bits12)
1816
1817         HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
1818                    vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
1819                    vpx_highbd_12_sub_pixel_variance64x32,
1820                    vpx_highbd_12_sub_pixel_avg_variance64x32,
1821                    vpx_highbd_sad64x32x4d_bits12)
1822
1823         HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
1824                    vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
1825                    vpx_highbd_12_sub_pixel_variance32x64,
1826                    vpx_highbd_12_sub_pixel_avg_variance32x64,
1827                    vpx_highbd_sad32x64x4d_bits12)
1828
1829         HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
1830                    vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
1831                    vpx_highbd_12_sub_pixel_variance32x32,
1832                    vpx_highbd_12_sub_pixel_avg_variance32x32,
1833                    vpx_highbd_sad32x32x4d_bits12)
1834
1835         HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
1836                    vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
1837                    vpx_highbd_12_sub_pixel_variance64x64,
1838                    vpx_highbd_12_sub_pixel_avg_variance64x64,
1839                    vpx_highbd_sad64x64x4d_bits12)
1840
1841         HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
1842                    vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
1843                    vpx_highbd_12_sub_pixel_variance16x16,
1844                    vpx_highbd_12_sub_pixel_avg_variance16x16,
1845                    vpx_highbd_sad16x16x4d_bits12)
1846
1847         HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
1848                    vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
1849                    vpx_highbd_12_sub_pixel_variance16x8,
1850                    vpx_highbd_12_sub_pixel_avg_variance16x8,
1851                    vpx_highbd_sad16x8x4d_bits12)
1852
1853         HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
1854                    vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
1855                    vpx_highbd_12_sub_pixel_variance8x16,
1856                    vpx_highbd_12_sub_pixel_avg_variance8x16,
1857                    vpx_highbd_sad8x16x4d_bits12)
1858
1859         HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12,
1860                    vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
1861                    vpx_highbd_12_sub_pixel_variance8x8,
1862                    vpx_highbd_12_sub_pixel_avg_variance8x8,
1863                    vpx_highbd_sad8x8x4d_bits12)
1864
1865         HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
1866                    vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
1867                    vpx_highbd_12_sub_pixel_variance8x4,
1868                    vpx_highbd_12_sub_pixel_avg_variance8x4,
1869                    vpx_highbd_sad8x4x4d_bits12)
1870
1871         HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
1872                    vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
1873                    vpx_highbd_12_sub_pixel_variance4x8,
1874                    vpx_highbd_12_sub_pixel_avg_variance4x8,
1875                    vpx_highbd_sad4x8x4d_bits12)
1876
1877         HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12,
1878                    vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
1879                    vpx_highbd_12_sub_pixel_variance4x4,
1880                    vpx_highbd_12_sub_pixel_avg_variance4x4,
1881                    vpx_highbd_sad4x4x4d_bits12)
1882         break;
1883     }
1884   }
1885 }
1886 #endif  // CONFIG_VP9_HIGHBITDEPTH
1887
1888 static void realloc_segmentation_maps(VP9_COMP *cpi) {
1889   VP9_COMMON *const cm = &cpi->common;
1890
1891   // Create the encoder segmentation map and set all entries to 0
1892   vpx_free(cpi->segmentation_map);
1893   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
1894                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1895
1896   // Create a map used for cyclic background refresh.
1897   if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh);
1898   CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
1899                   vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
1900
1901   // Create a map used to mark inactive areas.
1902   vpx_free(cpi->active_map.map);
1903   CHECK_MEM_ERROR(cm, cpi->active_map.map,
1904                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1905
1906   // And a place holder structure is the coding context
1907   // for use if we want to save and restore it
1908   vpx_free(cpi->coding_context.last_frame_seg_map_copy);
1909   CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
1910                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
1911 }
1912
1913 static void alloc_copy_partition_data(VP9_COMP *cpi) {
1914   VP9_COMMON *const cm = &cpi->common;
1915   if (cpi->prev_partition == NULL) {
1916     CHECK_MEM_ERROR(cm, cpi->prev_partition,
1917                     (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
1918                                              sizeof(*cpi->prev_partition)));
1919   }
1920   if (cpi->prev_segment_id == NULL) {
1921     CHECK_MEM_ERROR(
1922         cm, cpi->prev_segment_id,
1923         (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1924                              sizeof(*cpi->prev_segment_id)));
1925   }
1926   if (cpi->prev_variance_low == NULL) {
1927     CHECK_MEM_ERROR(cm, cpi->prev_variance_low,
1928                     (uint8_t *)vpx_calloc(
1929                         (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
1930                         sizeof(*cpi->prev_variance_low)));
1931   }
1932   if (cpi->copied_frame_cnt == NULL) {
1933     CHECK_MEM_ERROR(
1934         cm, cpi->copied_frame_cnt,
1935         (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
1936                               sizeof(*cpi->copied_frame_cnt)));
1937   }
1938 }
1939
1940 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
1941   VP9_COMMON *const cm = &cpi->common;
1942   RATE_CONTROL *const rc = &cpi->rc;
1943   int last_w = cpi->oxcf.width;
1944   int last_h = cpi->oxcf.height;
1945
1946   vp9_init_quantizer(cpi);
1947   if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
1948   cm->bit_depth = oxcf->bit_depth;
1949   cm->color_space = oxcf->color_space;
1950   cm->color_range = oxcf->color_range;
1951
1952   cpi->target_level = oxcf->target_level;
1953   cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX;
1954   set_level_constraint(&cpi->level_constraint,
1955                        get_level_index(cpi->target_level));
1956
1957   if (cm->profile <= PROFILE_1)
1958     assert(cm->bit_depth == VPX_BITS_8);
1959   else
1960     assert(cm->bit_depth > VPX_BITS_8);
1961
1962   cpi->oxcf = *oxcf;
1963 #if CONFIG_VP9_HIGHBITDEPTH
1964   cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
1965 #endif  // CONFIG_VP9_HIGHBITDEPTH
1966
1967   if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
1968     rc->baseline_gf_interval = FIXED_GF_INTERVAL;
1969   } else {
1970     rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
1971   }
1972
1973   cpi->refresh_golden_frame = 0;
1974   cpi->refresh_last_frame = 1;
1975   cm->refresh_frame_context = 1;
1976   cm->reset_frame_context = 0;
1977
1978   vp9_reset_segment_features(&cm->seg);
1979   vp9_set_high_precision_mv(cpi, 0);
1980
1981   {
1982     int i;
1983
1984     for (i = 0; i < MAX_SEGMENTS; i++)
1985       cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
1986   }
1987   cpi->encode_breakout = cpi->oxcf.encode_breakout;
1988
1989   set_rc_buffer_sizes(rc, &cpi->oxcf);
1990
1991   // Under a configuration change, where maximum_buffer_size may change,
1992   // keep buffer level clipped to the maximum allowed buffer size.
1993   rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
1994   rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
1995
1996   // Set up frame rate and related parameters rate control values.
1997   vp9_new_framerate(cpi, cpi->framerate);
1998
1999   // Set absolute upper and lower quality limits
2000   rc->worst_quality = cpi->oxcf.worst_allowed_q;
2001   rc->best_quality = cpi->oxcf.best_allowed_q;
2002
2003   cm->interp_filter = cpi->sf.default_interp_filter;
2004
2005   if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
2006     cm->render_width = cpi->oxcf.render_width;
2007     cm->render_height = cpi->oxcf.render_height;
2008   } else {
2009     cm->render_width = cpi->oxcf.width;
2010     cm->render_height = cpi->oxcf.height;
2011   }
2012   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2013     cm->width = cpi->oxcf.width;
2014     cm->height = cpi->oxcf.height;
2015     cpi->external_resize = 1;
2016   }
2017
2018   if (cpi->initial_width) {
2019     int new_mi_size = 0;
2020     vp9_set_mb_mi(cm, cm->width, cm->height);
2021     new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
2022     if (cm->mi_alloc_size < new_mi_size) {
2023       vp9_free_context_buffers(cm);
2024       alloc_compressor_data(cpi);
2025       realloc_segmentation_maps(cpi);
2026       cpi->initial_width = cpi->initial_height = 0;
2027       cpi->external_resize = 0;
2028     } else if (cm->mi_alloc_size == new_mi_size &&
2029                (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
2030       vp9_alloc_loop_filter(cm);
2031     }
2032   }
2033
2034   if (cm->current_video_frame == 0 || last_w != cpi->oxcf.width ||
2035       last_h != cpi->oxcf.height)
2036     update_frame_size(cpi);
2037
2038   if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
2039     memset(cpi->consec_zero_mv, 0,
2040            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
2041     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
2042       vp9_cyclic_refresh_reset_resize(cpi);
2043     rc->rc_1_frame = 0;
2044     rc->rc_2_frame = 0;
2045   }
2046
2047   if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
2048       ((cpi->svc.number_temporal_layers > 1 ||
2049         cpi->svc.number_spatial_layers > 1) &&
2050        cpi->oxcf.pass != 1)) {
2051     vp9_update_layer_context_change_config(cpi,
2052                                            (int)cpi->oxcf.target_bandwidth);
2053   }
2054
2055   // Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the
2056   // configuration change has a large change in avg_frame_bandwidth.
2057   // For SVC check for resetting based on spatial layer average bandwidth.
2058   // Also reset buffer level to optimal level.
2059   if (cm->current_video_frame > 0) {
2060     if (cpi->use_svc) {
2061       vp9_svc_check_reset_layer_rc_flag(cpi);
2062     } else {
2063       if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) ||
2064           rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) {
2065         rc->rc_1_frame = 0;
2066         rc->rc_2_frame = 0;
2067         rc->bits_off_target = rc->optimal_buffer_level;
2068         rc->buffer_level = rc->optimal_buffer_level;
2069       }
2070     }
2071   }
2072
2073   cpi->alt_ref_source = NULL;
2074   rc->is_src_frame_alt_ref = 0;
2075
2076 #if 0
2077   // Experimental RD Code
2078   cpi->frame_distortion = 0;
2079   cpi->last_frame_distortion = 0;
2080 #endif
2081
2082   set_tile_limits(cpi);
2083
2084   cpi->ext_refresh_frame_flags_pending = 0;
2085   cpi->ext_refresh_frame_context_pending = 0;
2086
2087 #if CONFIG_VP9_HIGHBITDEPTH
2088   highbd_set_var_fns(cpi);
2089 #endif
2090
2091   vp9_set_row_mt(cpi);
2092 }
2093
2094 #ifndef M_LOG2_E
2095 #define M_LOG2_E 0.693147180559945309417
2096 #endif
2097 #define log2f(x) (log(x) / (float)M_LOG2_E)
2098
2099 /***********************************************************************
2100  * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts'    *
2101  ***********************************************************************
2102  * The following 2 functions ('cal_nmvjointsadcost' and                *
2103  * 'cal_nmvsadcosts') are used to calculate cost lookup tables         *
2104  * used by 'vp9_diamond_search_sad'. The C implementation of the       *
2105  * function is generic, but the AVX intrinsics optimised version       *
2106  * relies on the following properties of the computed tables:          *
2107  * For cal_nmvjointsadcost:                                            *
2108  *   - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3]     *
2109  * For cal_nmvsadcosts:                                                *
2110  *   - For all i: mvsadcost[0][i] == mvsadcost[1][i]                   *
2111  *         (Equal costs for both components)                           *
2112  *   - For all i: mvsadcost[0][i] == mvsadcost[0][-i]                  *
2113  *         (Cost function is even)                                     *
2114  * If these do not hold, then the AVX optimised version of the         *
2115  * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
2116  * case you can revert to using the C function instead.                *
2117  ***********************************************************************/
2118
2119 static void cal_nmvjointsadcost(int *mvjointsadcost) {
2120   /*********************************************************************
2121    * Warning: Read the comments above before modifying this function   *
2122    *********************************************************************/
2123   mvjointsadcost[0] = 600;
2124   mvjointsadcost[1] = 300;
2125   mvjointsadcost[2] = 300;
2126   mvjointsadcost[3] = 300;
2127 }
2128
2129 static void cal_nmvsadcosts(int *mvsadcost[2]) {
2130   /*********************************************************************
2131    * Warning: Read the comments above before modifying this function   *
2132    *********************************************************************/
2133   int i = 1;
2134
2135   mvsadcost[0][0] = 0;
2136   mvsadcost[1][0] = 0;
2137
2138   do {
2139     double z = 256 * (2 * (log2f(8 * i) + .6));
2140     mvsadcost[0][i] = (int)z;
2141     mvsadcost[1][i] = (int)z;
2142     mvsadcost[0][-i] = (int)z;
2143     mvsadcost[1][-i] = (int)z;
2144   } while (++i <= MV_MAX);
2145 }
2146
2147 static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
2148   int i = 1;
2149
2150   mvsadcost[0][0] = 0;
2151   mvsadcost[1][0] = 0;
2152
2153   do {
2154     double z = 256 * (2 * (log2f(8 * i) + .6));
2155     mvsadcost[0][i] = (int)z;
2156     mvsadcost[1][i] = (int)z;
2157     mvsadcost[0][-i] = (int)z;
2158     mvsadcost[1][-i] = (int)z;
2159   } while (++i <= MV_MAX);
2160 }
2161
2162 VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
2163                                 BufferPool *const pool) {
2164   unsigned int i;
2165   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
2166   VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
2167
2168   if (!cm) return NULL;
2169
2170   vp9_zero(*cpi);
2171
2172   if (setjmp(cm->error.jmp)) {
2173     cm->error.setjmp = 0;
2174     vp9_remove_compressor(cpi);
2175     return 0;
2176   }
2177
2178   cm->error.setjmp = 1;
2179   cm->alloc_mi = vp9_enc_alloc_mi;
2180   cm->free_mi = vp9_enc_free_mi;
2181   cm->setup_mi = vp9_enc_setup_mi;
2182
2183   CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
2184   CHECK_MEM_ERROR(
2185       cm, cm->frame_contexts,
2186       (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
2187
2188   cpi->use_svc = 0;
2189   cpi->resize_state = ORIG;
2190   cpi->external_resize = 0;
2191   cpi->resize_avg_qp = 0;
2192   cpi->resize_buffer_underflow = 0;
2193   cpi->use_skin_detection = 0;
2194   cpi->common.buffer_pool = pool;
2195
2196   cpi->force_update_segmentation = 0;
2197
2198   init_config(cpi, oxcf);
2199   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
2200
2201   cm->current_video_frame = 0;
2202   cpi->partition_search_skippable_frame = 0;
2203   cpi->tile_data = NULL;
2204
2205   realloc_segmentation_maps(cpi);
2206
2207   CHECK_MEM_ERROR(
2208       cm, cpi->skin_map,
2209       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
2210
2211 #if !CONFIG_REALTIME_ONLY
2212   CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
2213 #endif
2214
2215   CHECK_MEM_ERROR(
2216       cm, cpi->consec_zero_mv,
2217       vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv)));
2218
2219   CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
2220                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
2221   CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
2222                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
2223   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
2224                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
2225   CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
2226                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
2227   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
2228                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
2229   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
2230                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
2231   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
2232                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
2233   CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
2234                   vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
2235
2236   for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
2237        i++) {
2238     CHECK_MEM_ERROR(
2239         cm, cpi->mbgraph_stats[i].mb_stats,
2240         vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
2241   }
2242
2243 #if CONFIG_FP_MB_STATS
2244   cpi->use_fp_mb_stats = 0;
2245   if (cpi->use_fp_mb_stats) {
2246     // a place holder used to store the first pass mb stats in the first pass
2247     CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
2248                     vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
2249   } else {
2250     cpi->twopass.frame_mb_stats_buf = NULL;
2251   }
2252 #endif
2253
2254   cpi->refresh_alt_ref_frame = 0;
2255   cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
2256
2257   init_level_info(&cpi->level_info);
2258   init_level_constraint(&cpi->level_constraint);
2259
2260 #if CONFIG_INTERNAL_STATS
2261   cpi->b_calculate_blockiness = 1;
2262   cpi->b_calculate_consistency = 1;
2263   cpi->total_inconsistency = 0;
2264   cpi->psnr.worst = 100.0;
2265   cpi->worst_ssim = 100.0;
2266
2267   cpi->count = 0;
2268   cpi->bytes = 0;
2269
2270   if (cpi->b_calculate_psnr) {
2271     cpi->total_sq_error = 0;
2272     cpi->total_samples = 0;
2273
2274     cpi->totalp_sq_error = 0;
2275     cpi->totalp_samples = 0;
2276
2277     cpi->tot_recode_hits = 0;
2278     cpi->summed_quality = 0;
2279     cpi->summed_weights = 0;
2280     cpi->summedp_quality = 0;
2281     cpi->summedp_weights = 0;
2282   }
2283
2284   cpi->fastssim.worst = 100.0;
2285
2286   cpi->psnrhvs.worst = 100.0;
2287
2288   if (cpi->b_calculate_blockiness) {
2289     cpi->total_blockiness = 0;
2290     cpi->worst_blockiness = 0.0;
2291   }
2292
2293   if (cpi->b_calculate_consistency) {
2294     CHECK_MEM_ERROR(cm, cpi->ssim_vars,
2295                     vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
2296                                sizeof(*cpi->ssim_vars) * 4));
2297     cpi->worst_consistency = 100.0;
2298   } else {
2299     cpi->ssim_vars = NULL;
2300   }
2301
2302 #endif
2303
2304   cpi->first_time_stamp_ever = INT64_MAX;
2305
2306   /*********************************************************************
2307    * Warning: Read the comments around 'cal_nmvjointsadcost' and       *
2308    * 'cal_nmvsadcosts' before modifying how these tables are computed. *
2309    *********************************************************************/
2310   cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
2311   cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
2312   cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
2313   cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
2314   cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
2315   cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
2316
2317   cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
2318   cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
2319   cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
2320   cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
2321   cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
2322
2323 #if CONFIG_VP9_TEMPORAL_DENOISING
2324 #ifdef OUTPUT_YUV_DENOISED
2325   yuv_denoised_file = fopen("denoised.yuv", "ab");
2326 #endif
2327 #endif
2328 #ifdef OUTPUT_YUV_SKINMAP
2329   yuv_skinmap_file = fopen("skinmap.yuv", "wb");
2330 #endif
2331 #ifdef OUTPUT_YUV_REC
2332   yuv_rec_file = fopen("rec.yuv", "wb");
2333 #endif
2334 #ifdef OUTPUT_YUV_SVC_SRC
2335   yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
2336   yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
2337   yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
2338 #endif
2339
2340 #if 0
2341   framepsnr = fopen("framepsnr.stt", "a");
2342   kf_list = fopen("kf_list.stt", "w");
2343 #endif
2344
2345   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
2346
2347 #if !CONFIG_REALTIME_ONLY
2348   if (oxcf->pass == 1) {
2349     vp9_init_first_pass(cpi);
2350   } else if (oxcf->pass == 2) {
2351     const size_t packet_sz = sizeof(FIRSTPASS_STATS);
2352     const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
2353
2354     if (cpi->svc.number_spatial_layers > 1 ||
2355         cpi->svc.number_temporal_layers > 1) {
2356       FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf;
2357       FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = { 0 };
2358       int i;
2359
2360       for (i = 0; i < oxcf->ss_number_layers; ++i) {
2361         FIRSTPASS_STATS *const last_packet_for_layer =
2362             &stats[packets - oxcf->ss_number_layers + i];
2363         const int layer_id = (int)last_packet_for_layer->spatial_layer_id;
2364         const int packets_in_layer = (int)last_packet_for_layer->count + 1;
2365         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) {
2366           LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id];
2367
2368           vpx_free(lc->rc_twopass_stats_in.buf);
2369
2370           lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
2371           CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
2372                           vpx_malloc(lc->rc_twopass_stats_in.sz));
2373           lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
2374           lc->twopass.stats_in = lc->twopass.stats_in_start;
2375           lc->twopass.stats_in_end =
2376               lc->twopass.stats_in_start + packets_in_layer - 1;
2377           stats_copy[layer_id] = lc->rc_twopass_stats_in.buf;
2378         }
2379       }
2380
2381       for (i = 0; i < packets; ++i) {
2382         const int layer_id = (int)stats[i].spatial_layer_id;
2383         if (layer_id >= 0 && layer_id < oxcf->ss_number_layers &&
2384             stats_copy[layer_id] != NULL) {
2385           *stats_copy[layer_id] = stats[i];
2386           ++stats_copy[layer_id];
2387         }
2388       }
2389
2390       vp9_init_second_pass_spatial_svc(cpi);
2391     } else {
2392 #if CONFIG_FP_MB_STATS
2393       if (cpi->use_fp_mb_stats) {
2394         const size_t psz = cpi->common.MBs * sizeof(uint8_t);
2395         const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
2396
2397         cpi->twopass.firstpass_mb_stats.mb_stats_start =
2398             oxcf->firstpass_mb_stats_in.buf;
2399         cpi->twopass.firstpass_mb_stats.mb_stats_end =
2400             cpi->twopass.firstpass_mb_stats.mb_stats_start +
2401             (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
2402       }
2403 #endif
2404
2405       cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
2406       cpi->twopass.stats_in = cpi->twopass.stats_in_start;
2407       cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
2408
2409       vp9_init_second_pass(cpi);
2410     }
2411   }
2412 #endif  // !CONFIG_REALTIME_ONLY
2413
2414   cpi->mb_wiener_var_cols = 0;
2415   cpi->mb_wiener_var_rows = 0;
2416   cpi->mb_wiener_variance = NULL;
2417
2418   vp9_set_speed_features_framesize_independent(cpi, oxcf->speed);
2419   vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed);
2420
2421   {
2422     const int bsize = BLOCK_16X16;
2423     const int w = num_8x8_blocks_wide_lookup[bsize];
2424     const int h = num_8x8_blocks_high_lookup[bsize];
2425     const int num_cols = (cm->mi_cols + w - 1) / w;
2426     const int num_rows = (cm->mi_rows + h - 1) / h;
2427     CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors,
2428                     vpx_calloc(num_rows * num_cols,
2429                                sizeof(*cpi->mi_ssim_rdmult_scaling_factors)));
2430   }
2431
2432   cpi->kmeans_data_arr_alloc = 0;
2433 #if CONFIG_NON_GREEDY_MV
2434   cpi->tpl_ready = 0;
2435 #endif  // CONFIG_NON_GREEDY_MV
2436   for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
2437
2438   // Allocate memory to store variances for a frame.
2439   CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
2440   cpi->source_var_thresh = 0;
2441   cpi->frames_till_next_var_check = 0;
2442 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \
2443   cpi->fn_ptr[BT].sdf = SDF;                             \
2444   cpi->fn_ptr[BT].sdaf = SDAF;                           \
2445   cpi->fn_ptr[BT].vf = VF;                               \
2446   cpi->fn_ptr[BT].svf = SVF;                             \
2447   cpi->fn_ptr[BT].svaf = SVAF;                           \
2448   cpi->fn_ptr[BT].sdx4df = SDX4DF;                       \
2449   cpi->fn_ptr[BT].sdx8f = SDX8F;
2450
2451   // TODO(angiebird): make sdx8f available for every block size
2452   BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
2453       vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
2454       vpx_sad32x16x4d, NULL)
2455
2456   BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
2457       vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
2458       vpx_sad16x32x4d, NULL)
2459
2460   BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
2461       vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
2462       vpx_sad64x32x4d, NULL)
2463
2464   BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
2465       vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
2466       vpx_sad32x64x4d, NULL)
2467
2468   BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
2469       vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
2470       vpx_sad32x32x4d, vpx_sad32x32x8)
2471
2472   BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
2473       vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
2474       vpx_sad64x64x4d, NULL)
2475
2476   BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
2477       vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
2478       vpx_sad16x16x4d, vpx_sad16x16x8)
2479
2480   BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
2481       vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
2482       vpx_sad16x8x4d, vpx_sad16x8x8)
2483
2484   BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
2485       vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
2486       vpx_sad8x16x4d, vpx_sad8x16x8)
2487
2488   BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
2489       vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
2490       vpx_sad8x8x8)
2491
2492   BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
2493       vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
2494       NULL)
2495
2496   BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
2497       vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
2498       NULL)
2499
2500   BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
2501       vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
2502       vpx_sad4x4x8)
2503
2504 #if CONFIG_VP9_HIGHBITDEPTH
2505   highbd_set_var_fns(cpi);
2506 #endif
2507
2508   /* vp9_init_quantizer() is first called here. Add check in
2509    * vp9_frame_init_quantizer() so that vp9_init_quantizer is only
2510    * called later when needed. This will avoid unnecessary calls of
2511    * vp9_init_quantizer() for every frame.
2512    */
2513   vp9_init_quantizer(cpi);
2514
2515   vp9_loop_filter_init(cm);
2516
2517   // Set up the unit scaling factor used during motion search.
2518 #if CONFIG_VP9_HIGHBITDEPTH
2519   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2520                                     cm->width, cm->height,
2521                                     cm->use_highbitdepth);
2522 #else
2523   vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
2524                                     cm->width, cm->height);
2525 #endif  // CONFIG_VP9_HIGHBITDEPTH
2526   cpi->td.mb.me_sf = &cpi->me_sf;
2527
2528   cm->error.setjmp = 0;
2529
2530   return cpi;
2531 }
2532
2533 #if CONFIG_INTERNAL_STATS
2534 #define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
2535
2536 #define SNPRINT2(H, T, V) \
2537   snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
2538 #endif  // CONFIG_INTERNAL_STATS
2539
2540 static void free_tpl_buffer(VP9_COMP *cpi);
2541
2542 void vp9_remove_compressor(VP9_COMP *cpi) {
2543   VP9_COMMON *cm;
2544   unsigned int i;
2545   int t;
2546
2547   if (!cpi) return;
2548
2549 #if CONFIG_INTERNAL_STATS
2550   vpx_free(cpi->ssim_vars);
2551 #endif
2552
2553   cm = &cpi->common;
2554   if (cm->current_video_frame > 0) {
2555 #if CONFIG_INTERNAL_STATS
2556     vpx_clear_system_state();
2557
2558     if (cpi->oxcf.pass != 1) {
2559       char headings[512] = { 0 };
2560       char results[512] = { 0 };
2561       FILE *f = fopen("opsnr.stt", "a");
2562       double time_encoded =
2563           (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
2564           10000000.000;
2565       double total_encode_time =
2566           (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
2567       const double dr =
2568           (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
2569       const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
2570       const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
2571       const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
2572
2573       if (cpi->b_calculate_psnr) {
2574         const double total_psnr = vpx_sse_to_psnr(
2575             (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
2576         const double totalp_psnr = vpx_sse_to_psnr(
2577             (double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error);
2578         const double total_ssim =
2579             100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
2580         const double totalp_ssim =
2581             100 * pow(cpi->summedp_quality / cpi->summedp_weights, 8.0);
2582
2583         snprintf(headings, sizeof(headings),
2584                  "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
2585                  "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
2586                  "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
2587                  "AVPsnrY\tAPsnrCb\tAPsnrCr");
2588         snprintf(results, sizeof(results),
2589                  "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2590                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2591                  "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
2592                  "%7.3f\t%7.3f\t%7.3f",
2593                  dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
2594                  cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, total_ssim,
2595                  totalp_ssim, cpi->fastssim.stat[ALL] / cpi->count,
2596                  cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst,
2597                  cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst,
2598                  cpi->psnr.stat[Y] / cpi->count, cpi->psnr.stat[U] / cpi->count,
2599                  cpi->psnr.stat[V] / cpi->count);
2600
2601         if (cpi->b_calculate_blockiness) {
2602           SNPRINT(headings, "\t  Block\tWstBlck");
2603           SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
2604           SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
2605         }
2606
2607         if (cpi->b_calculate_consistency) {
2608           double consistency =
2609               vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
2610                               (double)cpi->total_inconsistency);
2611
2612           SNPRINT(headings, "\tConsist\tWstCons");
2613           SNPRINT2(results, "\t%7.3f", consistency);
2614           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
2615         }
2616
2617         SNPRINT(headings, "\t    Time\tRcErr\tAbsErr");
2618         SNPRINT2(results, "\t%8.0f", total_encode_time);
2619         SNPRINT2(results, "\t%7.2f", rate_err);
2620         SNPRINT2(results, "\t%7.2f", fabs(rate_err));
2621
2622         fprintf(f, "%s\tAPsnr611\n", headings);
2623         fprintf(
2624             f, "%s\t%7.3f\n", results,
2625             (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) /
2626                 (cpi->count * 8));
2627       }
2628
2629       fclose(f);
2630     }
2631 #endif
2632
2633 #if 0
2634     {
2635       printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
2636       printf("\n_frames recive_data encod_mb_row compress_frame  Total\n");
2637       printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
2638              cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
2639              cpi->time_compress_data / 1000,
2640              (cpi->time_receive_data + cpi->time_compress_data) / 1000);
2641     }
2642 #endif
2643   }
2644
2645 #if CONFIG_VP9_TEMPORAL_DENOISING
2646   vp9_denoiser_free(&(cpi->denoiser));
2647 #endif
2648
2649   if (cpi->kmeans_data_arr_alloc) {
2650 #if CONFIG_MULTITHREAD
2651     pthread_mutex_destroy(&cpi->kmeans_mutex);
2652 #endif
2653     vpx_free(cpi->kmeans_data_arr);
2654   }
2655
2656   free_tpl_buffer(cpi);
2657
2658   for (t = 0; t < cpi->num_workers; ++t) {
2659     VPxWorker *const worker = &cpi->workers[t];
2660     EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
2661
2662     // Deallocate allocated threads.
2663     vpx_get_worker_interface()->end(worker);
2664
2665     // Deallocate allocated thread data.
2666     if (t < cpi->num_workers - 1) {
2667       vpx_free(thread_data->td->counts);
2668       vp9_free_pc_tree(thread_data->td);
2669       vpx_free(thread_data->td);
2670     }
2671   }
2672   vpx_free(cpi->tile_thr_data);
2673   vpx_free(cpi->workers);
2674   vp9_row_mt_mem_dealloc(cpi);
2675
2676   if (cpi->num_workers > 1) {
2677     vp9_loop_filter_dealloc(&cpi->lf_row_sync);
2678     vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
2679   }
2680
2681 #if !CONFIG_REALTIME_ONLY
2682   vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
2683 #endif
2684
2685   dealloc_compressor_data(cpi);
2686
2687   for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
2688        ++i) {
2689     vpx_free(cpi->mbgraph_stats[i].mb_stats);
2690   }
2691
2692 #if CONFIG_FP_MB_STATS
2693   if (cpi->use_fp_mb_stats) {
2694     vpx_free(cpi->twopass.frame_mb_stats_buf);
2695     cpi->twopass.frame_mb_stats_buf = NULL;
2696   }
2697 #endif
2698
2699   vp9_remove_common(cm);
2700   vp9_free_ref_frame_buffers(cm->buffer_pool);
2701 #if CONFIG_VP9_POSTPROC
2702   vp9_free_postproc_buffers(cm);
2703 #endif
2704   vpx_free(cpi);
2705
2706 #if CONFIG_VP9_TEMPORAL_DENOISING
2707 #ifdef OUTPUT_YUV_DENOISED
2708   fclose(yuv_denoised_file);
2709 #endif
2710 #endif
2711 #ifdef OUTPUT_YUV_SKINMAP
2712   fclose(yuv_skinmap_file);
2713 #endif
2714 #ifdef OUTPUT_YUV_REC
2715   fclose(yuv_rec_file);
2716 #endif
2717 #ifdef OUTPUT_YUV_SVC_SRC
2718   fclose(yuv_svc_src[0]);
2719   fclose(yuv_svc_src[1]);
2720   fclose(yuv_svc_src[2]);
2721 #endif
2722
2723 #if 0
2724
2725   if (keyfile)
2726     fclose(keyfile);
2727
2728   if (framepsnr)
2729     fclose(framepsnr);
2730
2731   if (kf_list)
2732     fclose(kf_list);
2733
2734 #endif
2735 }
2736
2737 static void generate_psnr_packet(VP9_COMP *cpi) {
2738   struct vpx_codec_cx_pkt pkt;
2739   int i;
2740   PSNR_STATS psnr;
2741 #if CONFIG_VP9_HIGHBITDEPTH
2742   vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr,
2743                        cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
2744 #else
2745   vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr);
2746 #endif
2747
2748   for (i = 0; i < 4; ++i) {
2749     pkt.data.psnr.samples[i] = psnr.samples[i];
2750     pkt.data.psnr.sse[i] = psnr.sse[i];
2751     pkt.data.psnr.psnr[i] = psnr.psnr[i];
2752   }
2753   pkt.kind = VPX_CODEC_PSNR_PKT;
2754   if (cpi->use_svc)
2755     cpi->svc
2756         .layer_context[cpi->svc.spatial_layer_id *
2757                        cpi->svc.number_temporal_layers]
2758         .psnr_pkt = pkt.data.psnr;
2759   else
2760     vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
2761 }
2762
2763 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
2764   if (ref_frame_flags > 7) return -1;
2765
2766   cpi->ref_frame_flags = ref_frame_flags;
2767   return 0;
2768 }
2769
2770 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) {
2771   cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0;
2772   cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0;
2773   cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0;
2774   cpi->ext_refresh_frame_flags_pending = 1;
2775 }
2776
2777 static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer(
2778     VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag) {
2779   MV_REFERENCE_FRAME ref_frame = NONE;
2780   if (ref_frame_flag == VP9_LAST_FLAG)
2781     ref_frame = LAST_FRAME;
2782   else if (ref_frame_flag == VP9_GOLD_FLAG)
2783     ref_frame = GOLDEN_FRAME;
2784   else if (ref_frame_flag == VP9_ALT_FLAG)
2785     ref_frame = ALTREF_FRAME;
2786
2787   return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
2788 }
2789
2790 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2791                            YV12_BUFFER_CONFIG *sd) {
2792   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2793   if (cfg) {
2794     vpx_yv12_copy_frame(cfg, sd);
2795     return 0;
2796   } else {
2797     return -1;
2798   }
2799 }
2800
2801 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
2802                           YV12_BUFFER_CONFIG *sd) {
2803   YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag);
2804   if (cfg) {
2805     vpx_yv12_copy_frame(sd, cfg);
2806     return 0;
2807   } else {
2808     return -1;
2809   }
2810 }
2811
2812 int vp9_update_entropy(VP9_COMP *cpi, int update) {
2813   cpi->ext_refresh_frame_context = update;
2814   cpi->ext_refresh_frame_context_pending = 1;
2815   return 0;
2816 }
2817
2818 #ifdef OUTPUT_YUV_REC
2819 void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
2820   YV12_BUFFER_CONFIG *s = cm->frame_to_show;
2821   uint8_t *src = s->y_buffer;
2822   int h = cm->height;
2823
2824 #if CONFIG_VP9_HIGHBITDEPTH
2825   if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
2826     uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
2827
2828     do {
2829       fwrite(src16, s->y_width, 2, yuv_rec_file);
2830       src16 += s->y_stride;
2831     } while (--h);
2832
2833     src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
2834     h = s->uv_height;
2835
2836     do {
2837       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2838       src16 += s->uv_stride;
2839     } while (--h);
2840
2841     src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
2842     h = s->uv_height;
2843
2844     do {
2845       fwrite(src16, s->uv_width, 2, yuv_rec_file);
2846       src16 += s->uv_stride;
2847     } while (--h);
2848
2849     fflush(yuv_rec_file);
2850     return;
2851   }
2852 #endif  // CONFIG_VP9_HIGHBITDEPTH
2853
2854   do {
2855     fwrite(src, s->y_width, 1, yuv_rec_file);
2856     src += s->y_stride;
2857   } while (--h);
2858
2859   src = s->u_buffer;
2860   h = s->uv_height;
2861
2862   do {
2863     fwrite(src, s->uv_width, 1, yuv_rec_file);
2864     src += s->uv_stride;
2865   } while (--h);
2866
2867   src = s->v_buffer;
2868   h = s->uv_height;
2869
2870   do {
2871     fwrite(src, s->uv_width, 1, yuv_rec_file);
2872     src += s->uv_stride;
2873   } while (--h);
2874
2875   fflush(yuv_rec_file);
2876 }
2877 #endif
2878
2879 #if CONFIG_VP9_HIGHBITDEPTH
2880 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
2881                                                 YV12_BUFFER_CONFIG *dst,
2882                                                 int bd) {
2883 #else
2884 static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
2885                                                 YV12_BUFFER_CONFIG *dst) {
2886 #endif  // CONFIG_VP9_HIGHBITDEPTH
2887   // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
2888   int i;
2889   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
2890                                    src->v_buffer };
2891   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
2892   const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
2893                               src->uv_crop_width };
2894   const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
2895                                src->uv_crop_height };
2896   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
2897   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
2898   const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
2899                               dst->uv_crop_width };
2900   const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
2901                                dst->uv_crop_height };
2902
2903   for (i = 0; i < MAX_MB_PLANE; ++i) {
2904 #if CONFIG_VP9_HIGHBITDEPTH
2905     if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
2906       vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
2907                               src_strides[i], dsts[i], dst_heights[i],
2908                               dst_widths[i], dst_strides[i], bd);
2909     } else {
2910       vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
2911                        dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
2912     }
2913 #else
2914     vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
2915                      dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
2916 #endif  // CONFIG_VP9_HIGHBITDEPTH
2917   }
2918   vpx_extend_frame_borders(dst);
2919 }
2920
2921 #if CONFIG_VP9_HIGHBITDEPTH
2922 static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
2923                                    YV12_BUFFER_CONFIG *dst, int bd,
2924                                    INTERP_FILTER filter_type,
2925                                    int phase_scaler) {
2926   const int src_w = src->y_crop_width;
2927   const int src_h = src->y_crop_height;
2928   const int dst_w = dst->y_crop_width;
2929   const int dst_h = dst->y_crop_height;
2930   const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
2931                                    src->v_buffer };
2932   const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
2933   uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
2934   const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
2935   const InterpKernel *const kernel = vp9_filter_kernels[filter_type];
2936   int x, y, i;
2937
2938   for (i = 0; i < MAX_MB_PLANE; ++i) {
2939     const int factor = (i == 0 || i == 3 ? 1 : 2);
2940     const int src_stride = src_strides[i];
2941     const int dst_stride = dst_strides[i];
2942     for (y = 0; y < dst_h; y += 16) {
2943       const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler;
2944       for (x = 0; x < dst_w; x += 16) {
2945         const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler;
2946         const uint8_t *src_ptr = srcs[i] +
2947                                  (y / factor) * src_h / dst_h * src_stride +
2948                                  (x / factor) * src_w / dst_w;
2949         uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
2950
2951         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
2952           vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
2953                                CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
2954                                x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
2955                                16 * src_h / dst_h, 16 / factor, 16 / factor,
2956                                bd);
2957         } else {
2958           vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
2959                         x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
2960                         16 * src_h / dst_h, 16 / factor, 16 / factor);
2961         }
2962       }
2963     }
2964   }
2965
2966   vpx_extend_frame_borders(dst);
2967 }
2968 #endif  // CONFIG_VP9_HIGHBITDEPTH
2969
2970 #if !CONFIG_REALTIME_ONLY
2971 static int scale_down(VP9_COMP *cpi, int q) {
2972   RATE_CONTROL *const rc = &cpi->rc;
2973   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
2974   int scale = 0;
2975   assert(frame_is_kf_gf_arf(cpi));
2976
2977   if (rc->frame_size_selector == UNSCALED &&
2978       q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
2979     const int max_size_thresh =
2980         (int)(rate_thresh_mult[SCALE_STEP1] *
2981               VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
2982     scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
2983   }
2984   return scale;
2985 }
2986
2987 static int big_rate_miss_high_threshold(VP9_COMP *cpi) {
2988   const RATE_CONTROL *const rc = &cpi->rc;
2989   int big_miss_high;
2990
2991   if (frame_is_kf_gf_arf(cpi))
2992     big_miss_high = rc->this_frame_target * 3 / 2;
2993   else
2994     big_miss_high = rc->this_frame_target * 2;
2995
2996   return big_miss_high;
2997 }
2998
2999 static int big_rate_miss(VP9_COMP *cpi) {
3000   const RATE_CONTROL *const rc = &cpi->rc;
3001   int big_miss_high;
3002   int big_miss_low;
3003
3004   // Ignore for overlay frames
3005   if (rc->is_src_frame_alt_ref) {
3006     return 0;
3007   } else {
3008     big_miss_low = (rc->this_frame_target / 2);
3009     big_miss_high = big_rate_miss_high_threshold(cpi);
3010
3011     return (rc->projected_frame_size > big_miss_high) ||
3012            (rc->projected_frame_size < big_miss_low);
3013   }
3014 }
3015
3016 // test in two pass for the first
3017 static int two_pass_first_group_inter(VP9_COMP *cpi) {
3018   if (cpi->oxcf.pass == 2) {
3019     TWO_PASS *const twopass = &cpi->twopass;
3020     GF_GROUP *const gf_group = &twopass->gf_group;
3021     const int gfg_index = gf_group->index;
3022
3023     if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE;
3024     return gf_group->update_type[gfg_index - 1] != LF_UPDATE &&
3025            gf_group->update_type[gfg_index] == LF_UPDATE;
3026   } else {
3027     return 0;
3028   }
3029 }
3030
3031 // Function to test for conditions that indicate we should loop
3032 // back and recode a frame.
3033 static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
3034                             int maxq, int minq) {
3035   const RATE_CONTROL *const rc = &cpi->rc;
3036   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
3037   const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
3038   int force_recode = 0;
3039
3040   if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3041       big_rate_miss(cpi) || (cpi->sf.recode_loop == ALLOW_RECODE) ||
3042       (two_pass_first_group_inter(cpi) &&
3043        (cpi->sf.recode_loop == ALLOW_RECODE_FIRST)) ||
3044       (frame_is_kfgfarf && (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF))) {
3045     if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
3046         scale_down(cpi, q)) {
3047       // Code this group at a lower resolution.
3048       cpi->resize_pending = 1;
3049       return 1;
3050     }
3051
3052     // Force recode for extreme overshoot.
3053     if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
3054         (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF &&
3055          rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))) {
3056       return 1;
3057     }
3058
3059     // TODO(agrange) high_limit could be greater than the scale-down threshold.
3060     if ((rc->projected_frame_size > high_limit && q < maxq) ||
3061         (rc->projected_frame_size < low_limit && q > minq)) {
3062       force_recode = 1;
3063     } else if (cpi->oxcf.rc_mode == VPX_CQ) {
3064       // Deal with frame undershoot and whether or not we are
3065       // below the automatically set cq level.
3066       if (q > oxcf->cq_level &&
3067           rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
3068         force_recode = 1;
3069       }
3070     }
3071   }
3072   return force_recode;
3073 }
3074 #endif  // !CONFIG_REALTIME_ONLY
3075
3076 static void update_ref_frames(VP9_COMP *cpi) {
3077   VP9_COMMON *const cm = &cpi->common;
3078   BufferPool *const pool = cm->buffer_pool;
3079   GF_GROUP *const gf_group = &cpi->twopass.gf_group;
3080
3081   if (cpi->rc.show_arf_as_gld) {
3082     int tmp = cpi->alt_fb_idx;
3083     cpi->alt_fb_idx = cpi->gld_fb_idx;
3084     cpi->gld_fb_idx = tmp;
3085   } else if (cm->show_existing_frame) {
3086     // Pop ARF.
3087     cpi->lst_fb_idx = cpi->alt_fb_idx;
3088     cpi->alt_fb_idx =
3089         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3090     --gf_group->stack_size;
3091   }
3092
3093   // At this point the new frame has been encoded.
3094   // If any buffer copy / swapping is signaled it should be done here.
3095   if (cm->frame_type == KEY_FRAME) {
3096     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3097                cm->new_fb_idx);
3098     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3099                cm->new_fb_idx);
3100   } else if (vp9_preserve_existing_gf(cpi)) {
3101     // We have decided to preserve the previously existing golden frame as our
3102     // new ARF frame. However, in the short term in function
3103     // vp9_get_refresh_mask() we left it in the GF slot and, if
3104     // we're updating the GF with the current decoded frame, we save it to the
3105     // ARF slot instead.
3106     // We now have to update the ARF with the current frame and swap gld_fb_idx
3107     // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
3108     // slot and, if we're updating the GF, the current frame becomes the new GF.
3109     int tmp;
3110
3111     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
3112                cm->new_fb_idx);
3113
3114     tmp = cpi->alt_fb_idx;
3115     cpi->alt_fb_idx = cpi->gld_fb_idx;
3116     cpi->gld_fb_idx = tmp;
3117   } else { /* For non key/golden frames */
3118     if (cpi->refresh_alt_ref_frame) {
3119       int arf_idx = gf_group->top_arf_idx;
3120
3121       // Push new ARF into stack.
3122       stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx,
3123                  gf_group->stack_size);
3124       ++gf_group->stack_size;
3125
3126       assert(arf_idx < REF_FRAMES);
3127
3128       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
3129       memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
3130              cpi->interp_filter_selected[0],
3131              sizeof(cpi->interp_filter_selected[0]));
3132
3133       cpi->alt_fb_idx = arf_idx;
3134     }
3135
3136     if (cpi->refresh_golden_frame) {
3137       ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
3138                  cm->new_fb_idx);
3139       if (!cpi->rc.is_src_frame_alt_ref)
3140         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3141                cpi->interp_filter_selected[0],
3142                sizeof(cpi->interp_filter_selected[0]));
3143       else
3144         memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
3145                cpi->interp_filter_selected[ALTREF_FRAME],
3146                sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
3147     }
3148   }
3149
3150   if (cpi->refresh_last_frame) {
3151     ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
3152                cm->new_fb_idx);
3153     if (!cpi->rc.is_src_frame_alt_ref)
3154       memcpy(cpi->interp_filter_selected[LAST_FRAME],
3155              cpi->interp_filter_selected[0],
3156              sizeof(cpi->interp_filter_selected[0]));
3157   }
3158
3159   if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
3160     cpi->alt_fb_idx =
3161         stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
3162     --gf_group->stack_size;
3163   }
3164 }
3165
3166 void vp9_update_reference_frames(VP9_COMP *cpi) {
3167   update_ref_frames(cpi);
3168
3169 #if CONFIG_VP9_TEMPORAL_DENOISING
3170   vp9_denoiser_update_ref_frame(cpi);
3171 #endif
3172
3173   if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi);
3174 }
3175
3176 static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
3177   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
3178   struct loopfilter *lf = &cm->lf;
3179   int is_reference_frame =
3180       (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
3181        cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
3182   if (cpi->use_svc &&
3183       cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
3184     is_reference_frame = !cpi->svc.non_reference_frame;
3185
3186   // Skip loop filter in show_existing_frame mode.
3187   if (cm->show_existing_frame) {
3188     lf->filter_level = 0;
3189     return;
3190   }
3191
3192   if (xd->lossless) {
3193     lf->filter_level = 0;
3194     lf->last_filt_level = 0;
3195   } else {
3196     struct vpx_usec_timer timer;
3197
3198     vpx_clear_system_state();
3199
3200     vpx_usec_timer_start(&timer);
3201
3202     if (!cpi->rc.is_src_frame_alt_ref) {
3203       if ((cpi->common.frame_type == KEY_FRAME) &&
3204           (!cpi->rc.this_key_frame_forced)) {
3205         lf->last_filt_level = 0;
3206       }
3207       vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
3208       lf->last_filt_level = lf->filter_level;
3209     } else {
3210       lf->filter_level = 0;
3211     }
3212
3213     vpx_usec_timer_mark(&timer);
3214     cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
3215   }
3216
3217   if (lf->filter_level > 0 && is_reference_frame) {
3218     vp9_build_mask_frame(cm, lf->filter_level, 0);
3219
3220     if (cpi->num_workers > 1)
3221       vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
3222                                lf->filter_level, 0, 0, cpi->workers,
3223                                cpi->num_workers, &cpi->lf_row_sync);
3224     else
3225       vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
3226   }
3227
3228   vpx_extend_frame_inner_borders(cm->frame_to_show);
3229 }
3230
3231 static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
3232   RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
3233   if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
3234       new_fb_ptr->mi_cols < cm->mi_cols) {
3235     vpx_free(new_fb_ptr->mvs);
3236     CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
3237                     (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
3238                                          sizeof(*new_fb_ptr->mvs)));
3239     new_fb_ptr->mi_rows = cm->mi_rows;
3240     new_fb_ptr->mi_cols = cm->mi_cols;
3241   }
3242 }
3243
3244 void vp9_scale_references(VP9_COMP *cpi) {
3245   VP9_COMMON *cm = &cpi->common;
3246   MV_REFERENCE_FRAME ref_frame;
3247   const VP9_REFFRAME ref_mask[3] = { VP9_LAST_FLAG, VP9_GOLD_FLAG,
3248                                      VP9_ALT_FLAG };
3249
3250   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3251     // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
3252     if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
3253       BufferPool *const pool = cm->buffer_pool;
3254       const YV12_BUFFER_CONFIG *const ref =
3255           get_ref_frame_buffer(cpi, ref_frame);
3256
3257       if (ref == NULL) {
3258         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3259         continue;
3260       }
3261
3262 #if CONFIG_VP9_HIGHBITDEPTH
3263       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3264         RefCntBuffer *new_fb_ptr = NULL;
3265         int force_scaling = 0;
3266         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3267         if (new_fb == INVALID_IDX) {
3268           new_fb = get_free_fb(cm);
3269           force_scaling = 1;
3270         }
3271         if (new_fb == INVALID_IDX) return;
3272         new_fb_ptr = &pool->frame_bufs[new_fb];
3273         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3274             new_fb_ptr->buf.y_crop_height != cm->height) {
3275           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3276                                        cm->subsampling_x, cm->subsampling_y,
3277                                        cm->use_highbitdepth,
3278                                        VP9_ENC_BORDER_IN_PIXELS,
3279                                        cm->byte_alignment, NULL, NULL, NULL))
3280             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3281                                "Failed to allocate frame buffer");
3282           scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth,
3283                                  EIGHTTAP, 0);
3284           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3285           alloc_frame_mvs(cm, new_fb);
3286         }
3287 #else
3288       if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
3289         RefCntBuffer *new_fb_ptr = NULL;
3290         int force_scaling = 0;
3291         int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
3292         if (new_fb == INVALID_IDX) {
3293           new_fb = get_free_fb(cm);
3294           force_scaling = 1;
3295         }
3296         if (new_fb == INVALID_IDX) return;
3297         new_fb_ptr = &pool->frame_bufs[new_fb];
3298         if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
3299             new_fb_ptr->buf.y_crop_height != cm->height) {
3300           if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
3301                                        cm->subsampling_x, cm->subsampling_y,
3302                                        VP9_ENC_BORDER_IN_PIXELS,
3303                                        cm->byte_alignment, NULL, NULL, NULL))
3304             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3305                                "Failed to allocate frame buffer");
3306           vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, EIGHTTAP, 0);
3307           cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
3308           alloc_frame_mvs(cm, new_fb);
3309         }
3310 #endif  // CONFIG_VP9_HIGHBITDEPTH
3311       } else {
3312         int buf_idx;
3313         RefCntBuffer *buf = NULL;
3314         if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3315           // Check for release of scaled reference.
3316           buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
3317           if (buf_idx != INVALID_IDX) {
3318             buf = &pool->frame_bufs[buf_idx];
3319             --buf->ref_count;
3320             cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3321           }
3322         }
3323         buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3324         buf = &pool->frame_bufs[buf_idx];
3325         buf->buf.y_crop_width = ref->y_crop_width;
3326         buf->buf.y_crop_height = ref->y_crop_height;
3327         cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
3328         ++buf->ref_count;
3329       }
3330     } else {
3331       if (cpi->oxcf.pass != 0 || cpi->use_svc)
3332         cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
3333     }
3334   }
3335 }
3336
3337 static void release_scaled_references(VP9_COMP *cpi) {
3338   VP9_COMMON *cm = &cpi->common;
3339   int i;
3340   if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
3341     // Only release scaled references under certain conditions:
3342     // if reference will be updated, or if scaled reference has same resolution.
3343     int refresh[3];
3344     refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
3345     refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
3346     refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
3347     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3348       const int idx = cpi->scaled_ref_idx[i - 1];
3349       if (idx != INVALID_IDX) {
3350         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3351         const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
3352         if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
3353                                buf->buf.y_crop_height == ref->y_crop_height)) {
3354           --buf->ref_count;
3355           cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
3356         }
3357       }
3358     }
3359   } else {
3360     for (i = 0; i < REFS_PER_FRAME; ++i) {
3361       const int idx = cpi->scaled_ref_idx[i];
3362       if (idx != INVALID_IDX) {
3363         RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
3364         --buf->ref_count;
3365         cpi->scaled_ref_idx[i] = INVALID_IDX;
3366       }
3367     }
3368   }
3369 }
3370
3371 static void full_to_model_count(unsigned int *model_count,
3372                                 unsigned int *full_count) {
3373   int n;
3374   model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
3375   model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
3376   model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
3377   for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
3378     model_count[TWO_TOKEN] += full_count[n];
3379   model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
3380 }
3381
3382 static void full_to_model_counts(vp9_coeff_count_model *model_count,
3383                                  vp9_coeff_count *full_count) {
3384   int i, j, k, l;
3385
3386   for (i = 0; i < PLANE_TYPES; ++i)
3387     for (j = 0; j < REF_TYPES; ++j)
3388       for (k = 0; k < COEF_BANDS; ++k)
3389         for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
3390           full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
3391 }
3392
3393 #if 0 && CONFIG_INTERNAL_STATS
3394 static void output_frame_level_debug_stats(VP9_COMP *cpi) {
3395   VP9_COMMON *const cm = &cpi->common;
3396   FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
3397   int64_t recon_err;
3398
3399   vpx_clear_system_state();
3400
3401 #if CONFIG_VP9_HIGHBITDEPTH
3402   if (cm->use_highbitdepth) {
3403     recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3404   } else {
3405     recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3406   }
3407 #else
3408   recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
3409 #endif  // CONFIG_VP9_HIGHBITDEPTH
3410
3411
3412   if (cpi->twopass.total_left_stats.coded_error != 0.0) {
3413     double dc_quant_devisor;
3414 #if CONFIG_VP9_HIGHBITDEPTH
3415     switch (cm->bit_depth) {
3416       case VPX_BITS_8:
3417         dc_quant_devisor = 4.0;
3418         break;
3419       case VPX_BITS_10:
3420         dc_quant_devisor = 16.0;
3421         break;
3422       default:
3423         assert(cm->bit_depth == VPX_BITS_12);
3424         dc_quant_devisor = 64.0;
3425         break;
3426     }
3427 #else
3428     dc_quant_devisor = 4.0;
3429 #endif
3430
3431     if (!cm->current_video_frame) {
3432       fprintf(f, "frame, width, height, last ts, last end ts, "
3433           "source_alt_ref_pending, source_alt_ref_active, "
3434           "this_frame_target, projected_frame_size, "
3435           "projected_frame_size / MBs, "
3436           "projected_frame_size - this_frame_target, "
3437           "vbr_bits_off_target, vbr_bits_off_target_fast, "
3438           "twopass.extend_minq, twopass.extend_minq_fast, "
3439           "total_target_vs_actual, "
3440           "starting_buffer_level - bits_off_target, "
3441           "total_actual_bits, base_qindex, q for base_qindex, "
3442           "dc quant, q for active_worst_quality, avg_q, q for oxcf.cq_level, "
3443           "refresh_last_frame, refresh_golden_frame, refresh_alt_ref_frame, "
3444           "frame_type, gfu_boost, "
3445           "twopass.bits_left, "
3446           "twopass.total_left_stats.coded_error, "
3447           "twopass.bits_left / (1 + twopass.total_left_stats.coded_error), "
3448           "tot_recode_hits, recon_err, kf_boost, "
3449           "twopass.kf_zeromotion_pct, twopass.fr_content_type, "
3450           "filter_level, seg.aq_av_offset\n");
3451     }
3452
3453     fprintf(f, "%10u, %d, %d, %10"PRId64", %10"PRId64", %d, %d, %10d, %10d, "
3454         "%10d, %10d, %10"PRId64", %10"PRId64", %5d, %5d, %10"PRId64", "
3455         "%10"PRId64", %10"PRId64", %10d, %7.2lf, %7.2lf, %7.2lf, %7.2lf, "
3456         "%7.2lf, %6d, %6d, %5d, %5d, %5d, %10"PRId64", %10.3lf, %10lf, %8u, "
3457         "%10"PRId64", %10d, %10d, %10d, %10d, %10d\n",
3458         cpi->common.current_video_frame,
3459         cm->width, cm->height,
3460         cpi->last_time_stamp_seen,
3461         cpi->last_end_time_stamp_seen,
3462         cpi->rc.source_alt_ref_pending,
3463         cpi->rc.source_alt_ref_active,
3464         cpi->rc.this_frame_target,
3465         cpi->rc.projected_frame_size,
3466         cpi->rc.projected_frame_size / cpi->common.MBs,
3467         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
3468         cpi->rc.vbr_bits_off_target,
3469         cpi->rc.vbr_bits_off_target_fast,
3470         cpi->twopass.extend_minq,
3471         cpi->twopass.extend_minq_fast,
3472         cpi->rc.total_target_vs_actual,
3473         (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
3474         cpi->rc.total_actual_bits, cm->base_qindex,
3475         vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
3476         (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) /
3477             dc_quant_devisor,
3478         vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality,
3479                                 cm->bit_depth),
3480         cpi->rc.avg_q,
3481         vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth),
3482         cpi->refresh_last_frame, cpi->refresh_golden_frame,
3483         cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
3484         cpi->twopass.bits_left,
3485         cpi->twopass.total_left_stats.coded_error,
3486         cpi->twopass.bits_left /
3487             (1 + cpi->twopass.total_left_stats.coded_error),
3488         cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
3489         cpi->twopass.kf_zeromotion_pct,
3490         cpi->twopass.fr_content_type,
3491         cm->lf.filter_level,
3492         cm->seg.aq_av_offset);
3493   }
3494   fclose(f);
3495
3496   if (0) {
3497     FILE *const fmodes = fopen("Modes.stt", "a");
3498     int i;
3499
3500     fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame,
3501             cm->frame_type, cpi->refresh_golden_frame,
3502             cpi->refresh_alt_ref_frame);
3503
3504     for (i = 0; i < MAX_MODES; ++i)
3505       fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
3506
3507     fprintf(fmodes, "\n");
3508
3509     fclose(fmodes);
3510   }
3511 }
3512 #endif
3513
3514 static void set_mv_search_params(VP9_COMP *cpi) {
3515   const VP9_COMMON *const cm = &cpi->common;
3516   const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
3517
3518   // Default based on max resolution.
3519   cpi->mv_step_param = vp9_init_search_range(max_mv_def);
3520
3521   if (cpi->sf.mv.auto_mv_step_size) {
3522     if (frame_is_intra_only(cm)) {
3523       // Initialize max_mv_magnitude for use in the first INTER frame
3524       // after a key/intra-only frame.
3525       cpi->max_mv_magnitude = max_mv_def;
3526     } else {
3527       if (cm->show_frame) {
3528         // Allow mv_steps to correspond to twice the max mv magnitude found
3529         // in the previous frame, capped by the default max_mv_magnitude based
3530         // on resolution.
3531         cpi->mv_step_param = vp9_init_search_range(
3532             VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
3533       }
3534       cpi->max_mv_magnitude = 0;
3535     }
3536   }
3537 }
3538
3539 static void set_size_independent_vars(VP9_COMP *cpi) {
3540   vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
3541   vp9_set_rd_speed_thresholds(cpi);
3542   vp9_set_rd_speed_thresholds_sub8x8(cpi);
3543   cpi->common.interp_filter = cpi->sf.default_interp_filter;
3544 }
3545
3546 static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
3547                                     int *top_index) {
3548   VP9_COMMON *const cm = &cpi->common;
3549
3550   // Setup variables that depend on the dimensions of the frame.
3551   vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed);
3552
3553   // Decide q and q bounds.
3554   *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
3555
3556   if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
3557     *q = cpi->rc.worst_quality;
3558     cpi->rc.force_max_q = 0;
3559   }
3560
3561   if (!frame_is_intra_only(cm)) {
3562     vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
3563   }
3564
3565 #if !CONFIG_REALTIME_ONLY
3566   // Configure experimental use of segmentation for enhanced coding of
3567   // static regions if indicated.
3568   // Only allowed in the second pass of a two pass encode, as it requires
3569   // lagged coding, and if the relevant speed feature flag is set.
3570   if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
3571     configure_static_seg_features(cpi);
3572 #endif  // !CONFIG_REALTIME_ONLY
3573
3574 #if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING)
3575   if (cpi->oxcf.noise_sensitivity > 0) {
3576     int l = 0;
3577     switch (cpi->oxcf.noise_sensitivity) {
3578       case 1: l = 20; break;
3579       case 2: l = 40; break;
3580       case 3: l = 60; break;
3581       case 4:
3582       case 5: l = 100; break;
3583       case 6: l = 150; break;
3584     }
3585     if (!cpi->common.postproc_state.limits) {
3586       cpi->common.postproc_state.limits =
3587           vpx_calloc(cpi->un_scaled_source->y_width,
3588                      sizeof(*cpi->common.postproc_state.limits));
3589     }
3590     vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l,
3591                 cpi->common.postproc_state.limits);
3592   }
3593 #endif  // CONFIG_VP9_POSTPROC
3594 }
3595
3596 #if CONFIG_VP9_TEMPORAL_DENOISING
3597 static void setup_denoiser_buffer(VP9_COMP *cpi) {
3598   VP9_COMMON *const cm = &cpi->common;
3599   if (cpi->oxcf.noise_sensitivity > 0 &&
3600       !cpi->denoiser.frame_buffer_initialized) {
3601     if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
3602                            cpi->oxcf.noise_sensitivity, cm->width, cm->height,
3603                            cm->subsampling_x, cm->subsampling_y,
3604 #if CONFIG_VP9_HIGHBITDEPTH
3605                            cm->use_highbitdepth,
3606 #endif
3607                            VP9_ENC_BORDER_IN_PIXELS))
3608       vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3609                          "Failed to allocate denoiser");
3610   }
3611 }
3612 #endif
3613
3614 static void init_motion_estimation(VP9_COMP *cpi) {
3615   int y_stride = cpi->scaled_source.y_stride;
3616
3617   if (cpi->sf.mv.search_method == NSTEP) {
3618     vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
3619   } else if (cpi->sf.mv.search_method == DIAMOND) {
3620     vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
3621   }
3622 }
3623
3624 static void set_frame_size(VP9_COMP *cpi) {
3625   int ref_frame;
3626   VP9_COMMON *const cm = &cpi->common;
3627   VP9EncoderConfig *const oxcf = &cpi->oxcf;
3628   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
3629
3630 #if !CONFIG_REALTIME_ONLY
3631   if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR &&
3632       ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
3633        (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
3634     calculate_coded_size(cpi, &oxcf->scaled_frame_width,
3635                          &oxcf->scaled_frame_height);
3636
3637     // There has been a change in frame size.
3638     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3639                          oxcf->scaled_frame_height);
3640   }
3641 #endif  // !CONFIG_REALTIME_ONLY
3642
3643   if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && !cpi->use_svc &&
3644       oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) {
3645     oxcf->scaled_frame_width =
3646         (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
3647     oxcf->scaled_frame_height =
3648         (oxcf->height * cpi->resize_scale_num) / cpi->resize_scale_den;
3649     // There has been a change in frame size.
3650     vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
3651                          oxcf->scaled_frame_height);
3652
3653     // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
3654     set_mv_search_params(cpi);
3655
3656     vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
3657 #if CONFIG_VP9_TEMPORAL_DENOISING
3658     // Reset the denoiser on the resized frame.
3659     if (cpi->oxcf.noise_sensitivity > 0) {
3660       vp9_denoiser_free(&(cpi->denoiser));
3661       setup_denoiser_buffer(cpi);
3662       // Dynamic resize is only triggered for non-SVC, so we can force
3663       // golden frame update here as temporary fix to denoiser.
3664       cpi->refresh_golden_frame = 1;
3665     }
3666 #endif
3667   }
3668
3669   if ((oxcf->pass == 2) && !cpi->use_svc) {
3670     vp9_set_target_rate(cpi);
3671   }
3672
3673   alloc_frame_mvs(cm, cm->new_fb_idx);
3674
3675   // Reset the frame pointers to the current frame size.
3676   if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
3677                                cm->subsampling_x, cm->subsampling_y,
3678 #if CONFIG_VP9_HIGHBITDEPTH
3679                                cm->use_highbitdepth,
3680 #endif
3681                                VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
3682                                NULL, NULL, NULL))
3683     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
3684                        "Failed to allocate frame buffer");
3685
3686   alloc_util_frame_buffers(cpi);
3687   init_motion_estimation(cpi);
3688
3689   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3690     RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
3691     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
3692
3693     ref_buf->idx = buf_idx;
3694
3695     if (buf_idx != INVALID_IDX) {
3696       YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
3697       ref_buf->buf = buf;
3698 #if CONFIG_VP9_HIGHBITDEPTH
3699       vp9_setup_scale_factors_for_frame(
3700           &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width,
3701           cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0);
3702 #else
3703       vp9_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
3704                                         buf->y_crop_height, cm->width,
3705                                         cm->height);
3706 #endif  // CONFIG_VP9_HIGHBITDEPTH
3707       if (vp9_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf);
3708     } else {
3709       ref_buf->buf = NULL;
3710     }
3711   }
3712
3713   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
3714 }
3715
3716 #if CONFIG_CONSISTENT_RECODE
3717 static void save_encode_params(VP9_COMP *cpi) {
3718   VP9_COMMON *const cm = &cpi->common;
3719   const int tile_cols = 1 << cm->log2_tile_cols;
3720   const int tile_rows = 1 << cm->log2_tile_rows;
3721   int tile_col, tile_row;
3722   int i, j;
3723   RD_OPT *rd_opt = &cpi->rd;
3724   for (i = 0; i < MAX_REF_FRAMES; i++) {
3725     for (j = 0; j < REFERENCE_MODES; j++)
3726       rd_opt->prediction_type_threshes_prev[i][j] =
3727           rd_opt->prediction_type_threshes[i][j];
3728
3729     for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
3730       rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
3731   }
3732
3733   if (cpi->tile_data != NULL) {
3734     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3735       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3736         TileDataEnc *tile_data =
3737             &cpi->tile_data[tile_row * tile_cols + tile_col];
3738         for (i = 0; i < BLOCK_SIZES; ++i) {
3739           for (j = 0; j < MAX_MODES; ++j) {
3740             tile_data->thresh_freq_fact_prev[i][j] =
3741                 tile_data->thresh_freq_fact[i][j];
3742           }
3743         }
3744       }
3745   }
3746 }
3747 #endif
3748
3749 static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
3750 #ifdef ENABLE_KF_DENOISE
3751   if (is_spatial_denoise_enabled(cpi)) {
3752     cpi->raw_source_frame = vp9_scale_if_required(
3753         cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3754         (oxcf->pass == 0), EIGHTTAP, 0);
3755   } else {
3756     cpi->raw_source_frame = cpi->Source;
3757   }
3758 #else
3759   cpi->raw_source_frame = cpi->Source;
3760 #endif
3761 }
3762
3763 static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
3764                                       uint8_t *dest) {
3765   VP9_COMMON *const cm = &cpi->common;
3766   SVC *const svc = &cpi->svc;
3767   int q = 0, bottom_index = 0, top_index = 0;
3768   int no_drop_scene_change = 0;
3769   const INTERP_FILTER filter_scaler =
3770       (is_one_pass_cbr_svc(cpi))
3771           ? svc->downsample_filter_type[svc->spatial_layer_id]
3772           : EIGHTTAP;
3773   const int phase_scaler =
3774       (is_one_pass_cbr_svc(cpi))
3775           ? svc->downsample_filter_phase[svc->spatial_layer_id]
3776           : 0;
3777
3778   if (cm->show_existing_frame) {
3779     cpi->rc.this_frame_target = 0;
3780     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
3781     return 1;
3782   }
3783
3784   svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
3785
3786   // Flag to check if its valid to compute the source sad (used for
3787   // scene detection and for superblock content state in CBR mode).
3788   // The flag may get reset below based on SVC or resizing state.
3789   cpi->compute_source_sad_onepass = cpi->oxcf.mode == REALTIME;
3790
3791   vpx_clear_system_state();
3792
3793   set_frame_size(cpi);
3794
3795   if (is_one_pass_cbr_svc(cpi) &&
3796       cpi->un_scaled_source->y_width == cm->width << 2 &&
3797       cpi->un_scaled_source->y_height == cm->height << 2 &&
3798       svc->scaled_temp.y_width == cm->width << 1 &&
3799       svc->scaled_temp.y_height == cm->height << 1) {
3800     // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
3801     // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
3802     // result will be saved in scaled_temp and might be used later.
3803     const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
3804     const int phase_scaler2 = svc->downsample_filter_phase[1];
3805     cpi->Source = vp9_svc_twostage_scale(
3806         cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
3807         filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
3808     svc->scaled_one_half = 1;
3809   } else if (is_one_pass_cbr_svc(cpi) &&
3810              cpi->un_scaled_source->y_width == cm->width << 1 &&
3811              cpi->un_scaled_source->y_height == cm->height << 1 &&
3812              svc->scaled_one_half) {
3813     // If the spatial layer is 1/2x1/2 and the scaling is already done in the
3814     // two-stage scaling, use the result directly.
3815     cpi->Source = &svc->scaled_temp;
3816     svc->scaled_one_half = 0;
3817   } else {
3818     cpi->Source = vp9_scale_if_required(
3819         cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
3820         filter_scaler, phase_scaler);
3821   }
3822 #ifdef OUTPUT_YUV_SVC_SRC
3823   // Write out at most 3 spatial layers.
3824   if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
3825     vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
3826   }
3827 #endif
3828   // Unfiltered raw source used in metrics calculation if the source
3829   // has been filtered.
3830   if (is_psnr_calc_enabled(cpi)) {
3831 #ifdef ENABLE_KF_DENOISE
3832     if (is_spatial_denoise_enabled(cpi)) {
3833       cpi->raw_source_frame = vp9_scale_if_required(
3834           cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
3835           (cpi->oxcf.pass == 0), EIGHTTAP, phase_scaler);
3836     } else {
3837       cpi->raw_source_frame = cpi->Source;
3838     }
3839 #else
3840     cpi->raw_source_frame = cpi->Source;
3841 #endif
3842   }
3843
3844   if ((cpi->use_svc &&
3845        (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
3846         svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
3847         svc->current_superframe < 1)) ||
3848       cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
3849       cpi->resize_state != ORIG) {
3850     cpi->compute_source_sad_onepass = 0;
3851     if (cpi->content_state_sb_fd != NULL)
3852       memset(cpi->content_state_sb_fd, 0,
3853              (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) *
3854                  sizeof(*cpi->content_state_sb_fd));
3855   }
3856
3857   // Avoid scaling last_source unless its needed.
3858   // Last source is needed if avg_source_sad() is used, or if
3859   // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
3860   // estimation is enabled.
3861   if (cpi->unscaled_last_source != NULL &&
3862       (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3863        (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
3864         cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) ||
3865        cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION ||
3866        (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) ||
3867        cpi->compute_source_sad_onepass))
3868     cpi->Last_Source = vp9_scale_if_required(
3869         cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
3870         (cpi->oxcf.pass == 0), EIGHTTAP, 0);
3871
3872   if (cpi->Last_Source == NULL ||
3873       cpi->Last_Source->y_width != cpi->Source->y_width ||
3874       cpi->Last_Source->y_height != cpi->Source->y_height)
3875     cpi->compute_source_sad_onepass = 0;
3876
3877   if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
3878     memset(cpi->consec_zero_mv, 0,
3879            cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
3880   }
3881
3882 #if CONFIG_VP9_TEMPORAL_DENOISING
3883   if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
3884     vp9_denoiser_reset_on_first_frame(cpi);
3885 #endif
3886
3887   // Scene detection is always used for VBR mode or screen-content case.
3888   // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
3889   // (need to check encoding time cost for doing this for speed 8).
3890   cpi->rc.high_source_sad = 0;
3891   cpi->rc.hybrid_intra_scene_change = 0;
3892   cpi->rc.re_encode_maxq_scene_change = 0;
3893   if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
3894       (cpi->oxcf.rc_mode == VPX_VBR ||
3895        cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
3896        (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
3897     vp9_scene_detection_onepass(cpi);
3898
3899   if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
3900     svc->high_source_sad_superframe = cpi->rc.high_source_sad;
3901     svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
3902     // On scene change reset temporal layer pattern to TL0.
3903     // Note that if the base/lower spatial layers are skipped: instead of
3904     // inserting base layer here, we force max-q for the next superframe
3905     // with lower spatial layers: this is done in vp9_encodedframe_overshoot()
3906     // when max-q is decided for the current layer.
3907     // Only do this reset for bypass/flexible mode.
3908     if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
3909         svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
3910       // rc->high_source_sad will get reset so copy it to restore it.
3911       int tmp_high_source_sad = cpi->rc.high_source_sad;
3912       vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
3913       cpi->rc.high_source_sad = tmp_high_source_sad;
3914     }
3915   }
3916
3917   vp9_update_noise_estimate(cpi);
3918
3919   // For 1 pass CBR, check if we are dropping this frame.
3920   // Never drop on key frame, if base layer is key for svc,
3921   // on scene change, or if superframe has layer sync.
3922   if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
3923       !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
3924     no_drop_scene_change = 1;
3925   if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
3926       !frame_is_intra_only(cm) && !no_drop_scene_change &&
3927       !svc->superframe_has_layer_sync &&
3928       (!cpi->use_svc ||
3929        !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
3930     if (vp9_rc_drop_frame(cpi)) return 0;
3931   }
3932
3933   // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
3934   // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
3935   // avoid this frame-level upsampling (for non intra_only frames).
3936   if (frame_is_intra_only(cm) == 0 &&
3937       !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) {
3938     vp9_scale_references(cpi);
3939   }
3940
3941   set_size_independent_vars(cpi);
3942   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
3943
3944   // search method and step parameter might be changed in speed settings.
3945   init_motion_estimation(cpi);
3946
3947   if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
3948
3949   if (cpi->sf.svc_use_lowres_part &&
3950       svc->spatial_layer_id == svc->number_spatial_layers - 2) {
3951     if (svc->prev_partition_svc == NULL) {
3952       CHECK_MEM_ERROR(
3953           cm, svc->prev_partition_svc,
3954           (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
3955                                    sizeof(*svc->prev_partition_svc)));
3956     }
3957   }
3958
3959   // TODO(jianj): Look into issue of skin detection with high bitdepth.
3960   if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
3961       cpi->oxcf.rc_mode == VPX_CBR &&
3962       cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
3963       cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
3964     cpi->use_skin_detection = 1;
3965   }
3966
3967   // Enable post encode frame dropping for CBR on non key frame, when
3968   // ext_use_post_encode_drop is specified by user.
3969   cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
3970                                  cpi->oxcf.rc_mode == VPX_CBR &&
3971                                  cm->frame_type != KEY_FRAME;
3972
3973   vp9_set_quantizer(cm, q);
3974   vp9_set_variance_partition_thresholds(cpi, q, 0);
3975
3976   setup_frame(cpi);
3977
3978   suppress_active_map(cpi);
3979
3980   if (cpi->use_svc) {
3981     // On non-zero spatial layer, check for disabling inter-layer
3982     // prediction.
3983     if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
3984     vp9_svc_assert_constraints_pattern(cpi);
3985   }
3986
3987   if (cpi->rc.last_post_encode_dropped_scene_change) {
3988     cpi->rc.high_source_sad = 1;
3989     svc->high_source_sad_superframe = 1;
3990     // For now disable use_source_sad since Last_Source will not be the previous
3991     // encoded but the dropped one.
3992     cpi->sf.use_source_sad = 0;
3993     cpi->rc.last_post_encode_dropped_scene_change = 0;
3994   }
3995   // Check if this high_source_sad (scene/slide change) frame should be
3996   // encoded at high/max QP, and if so, set the q and adjust some rate
3997   // control parameters.
3998   if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
3999       (cpi->rc.high_source_sad ||
4000        (cpi->use_svc && svc->high_source_sad_superframe))) {
4001     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
4002       vp9_set_quantizer(cm, q);
4003       vp9_set_variance_partition_thresholds(cpi, q, 0);
4004     }
4005   }
4006
4007 #if !CONFIG_REALTIME_ONLY
4008   // Variance adaptive and in frame q adjustment experiments are mutually
4009   // exclusive.
4010   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
4011     vp9_vaq_frame_setup(cpi);
4012   } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) {
4013     vp9_360aq_frame_setup(cpi);
4014   } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
4015     vp9_setup_in_frame_q_adj(cpi);
4016   } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) {
4017     // it may be pretty bad for rate-control,
4018     // and I should handle it somehow
4019     vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4020   } else {
4021 #endif
4022     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4023       vp9_cyclic_refresh_setup(cpi);
4024     } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
4025       apply_roi_map(cpi);
4026     }
4027 #if !CONFIG_REALTIME_ONLY
4028   }
4029 #endif
4030
4031   apply_active_map(cpi);
4032
4033   vp9_encode_frame(cpi);
4034
4035   // Check if we should re-encode this frame at high Q because of high
4036   // overshoot based on the encoded frame size. Only for frames where
4037   // high temporal-source SAD is detected.
4038   // For SVC: all spatial layers are checked for re-encoding.
4039   if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
4040       (cpi->rc.high_source_sad ||
4041        (cpi->use_svc && svc->high_source_sad_superframe))) {
4042     int frame_size = 0;
4043     // Get an estimate of the encoded frame size.
4044     save_coding_context(cpi);
4045     vp9_pack_bitstream(cpi, dest, size);
4046     restore_coding_context(cpi);
4047     frame_size = (int)(*size) << 3;
4048     // Check if encoded frame will overshoot too much, and if so, set the q and
4049     // adjust some rate control parameters, and return to re-encode the frame.
4050     if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) {
4051       vpx_clear_system_state();
4052       vp9_set_quantizer(cm, q);
4053       vp9_set_variance_partition_thresholds(cpi, q, 0);
4054       suppress_active_map(cpi);
4055       // Turn-off cyclic refresh for re-encoded frame.
4056       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
4057         CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
4058         unsigned char *const seg_map = cpi->segmentation_map;
4059         memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
4060         memset(cr->last_coded_q_map, MAXQ,
4061                cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
4062         cr->sb_index = 0;
4063         vp9_disable_segmentation(&cm->seg);
4064       }
4065       apply_active_map(cpi);
4066       vp9_encode_frame(cpi);
4067     }
4068   }
4069
4070   // Update some stats from cyclic refresh, and check for golden frame update.
4071   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
4072       !frame_is_intra_only(cm))
4073     vp9_cyclic_refresh_postencode(cpi);
4074
4075   // Update the skip mb flag probabilities based on the distribution
4076   // seen in the last encoder iteration.
4077   // update_base_skip_probs(cpi);
4078   vpx_clear_system_state();
4079   return 1;
4080 }
4081
4082 #if !CONFIG_REALTIME_ONLY
4083 #define MAX_QSTEP_ADJ 4
4084 static int get_qstep_adj(int rate_excess, int rate_limit) {
4085   int qstep =
4086       rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX;
4087   return VPXMIN(qstep, MAX_QSTEP_ADJ);
4088 }
4089
4090 static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
4091                                     uint8_t *dest) {
4092   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4093   VP9_COMMON *const cm = &cpi->common;
4094   RATE_CONTROL *const rc = &cpi->rc;
4095   int bottom_index, top_index;
4096   int loop_count = 0;
4097   int loop_at_this_size = 0;
4098   int loop = 0;
4099   int overshoot_seen = 0;
4100   int undershoot_seen = 0;
4101   int frame_over_shoot_limit;
4102   int frame_under_shoot_limit;
4103   int q = 0, q_low = 0, q_high = 0;
4104   int enable_acl;
4105 #ifdef AGGRESSIVE_VBR
4106   int qrange_adj = 1;
4107 #endif
4108
4109   if (cm->show_existing_frame) {
4110     rc->this_frame_target = 0;
4111     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
4112     return;
4113   }
4114
4115   set_size_independent_vars(cpi);
4116
4117   enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
4118                                        (cpi->twopass.gf_group.index == 1)
4119                                  : 0;
4120
4121   do {
4122     vpx_clear_system_state();
4123
4124     set_frame_size(cpi);
4125
4126     if (loop_count == 0 || cpi->resize_pending != 0) {
4127       set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
4128
4129 #ifdef AGGRESSIVE_VBR
4130       if (two_pass_first_group_inter(cpi)) {
4131         // Adjustment limits for min and max q
4132         qrange_adj = VPXMAX(1, (top_index - bottom_index) / 2);
4133
4134         bottom_index =
4135             VPXMAX(bottom_index - qrange_adj / 2, oxcf->best_allowed_q);
4136         top_index = VPXMIN(oxcf->worst_allowed_q, top_index + qrange_adj / 2);
4137       }
4138 #endif
4139       // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
4140       set_mv_search_params(cpi);
4141
4142       // Reset the loop state for new frame size.
4143       overshoot_seen = 0;
4144       undershoot_seen = 0;
4145
4146       // Reconfiguration for change in frame size has concluded.
4147       cpi->resize_pending = 0;
4148
4149       q_low = bottom_index;
4150       q_high = top_index;
4151
4152       loop_at_this_size = 0;
4153     }
4154
4155     // Decide frame size bounds first time through.
4156     if (loop_count == 0) {
4157       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
4158                                        &frame_under_shoot_limit,
4159                                        &frame_over_shoot_limit);
4160     }
4161
4162     cpi->Source =
4163         vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source,
4164                               (oxcf->pass == 0), EIGHTTAP, 0);
4165
4166     // Unfiltered raw source used in metrics calculation if the source
4167     // has been filtered.
4168     if (is_psnr_calc_enabled(cpi)) {
4169 #ifdef ENABLE_KF_DENOISE
4170       if (is_spatial_denoise_enabled(cpi)) {
4171         cpi->raw_source_frame = vp9_scale_if_required(
4172             cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
4173             (oxcf->pass == 0), EIGHTTAP, 0);
4174       } else {
4175         cpi->raw_source_frame = cpi->Source;
4176       }
4177 #else
4178       cpi->raw_source_frame = cpi->Source;
4179 #endif
4180     }
4181
4182     if (cpi->unscaled_last_source != NULL)
4183       cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
4184                                                &cpi->scaled_last_source,
4185                                                (oxcf->pass == 0), EIGHTTAP, 0);
4186
4187     if (frame_is_intra_only(cm) == 0) {
4188       if (loop_count > 0) {
4189         release_scaled_references(cpi);
4190       }
4191       vp9_scale_references(cpi);
4192     }
4193
4194     vp9_set_quantizer(cm, q);
4195
4196     if (loop_count == 0) setup_frame(cpi);
4197
4198     // Variance adaptive and in frame q adjustment experiments are mutually
4199     // exclusive.
4200     if (oxcf->aq_mode == VARIANCE_AQ) {
4201       vp9_vaq_frame_setup(cpi);
4202     } else if (oxcf->aq_mode == EQUATOR360_AQ) {
4203       vp9_360aq_frame_setup(cpi);
4204     } else if (oxcf->aq_mode == COMPLEXITY_AQ) {
4205       vp9_setup_in_frame_q_adj(cpi);
4206     } else if (oxcf->aq_mode == LOOKAHEAD_AQ) {
4207       vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
4208     } else if (oxcf->aq_mode == PSNR_AQ) {
4209       vp9_psnr_aq_mode_setup(&cm->seg);
4210     }
4211
4212     vp9_encode_frame(cpi);
4213
4214     // Update the skip mb flag probabilities based on the distribution
4215     // seen in the last encoder iteration.
4216     // update_base_skip_probs(cpi);
4217
4218     vpx_clear_system_state();
4219
4220     // Dummy pack of the bitstream using up to date stats to get an
4221     // accurate estimate of output frame size to determine if we need
4222     // to recode.
4223     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
4224       save_coding_context(cpi);
4225       if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size);
4226
4227       rc->projected_frame_size = (int)(*size) << 3;
4228
4229       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
4230     }
4231
4232     if (oxcf->rc_mode == VPX_Q) {
4233       loop = 0;
4234     } else {
4235       if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
4236           (rc->projected_frame_size < rc->max_frame_bandwidth)) {
4237         int last_q = q;
4238         int64_t kf_err;
4239
4240         int64_t high_err_target = cpi->ambient_err;
4241         int64_t low_err_target = cpi->ambient_err >> 1;
4242
4243 #if CONFIG_VP9_HIGHBITDEPTH
4244         if (cm->use_highbitdepth) {
4245           kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4246         } else {
4247           kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4248         }
4249 #else
4250         kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
4251 #endif  // CONFIG_VP9_HIGHBITDEPTH
4252
4253         // Prevent possible divide by zero error below for perfect KF
4254         kf_err += !kf_err;
4255
4256         // The key frame is not good enough or we can afford
4257         // to make it better without undue risk of popping.
4258         if ((kf_err > high_err_target &&
4259              rc->projected_frame_size <= frame_over_shoot_limit) ||
4260             (kf_err > low_err_target &&
4261              rc->projected_frame_size <= frame_under_shoot_limit)) {
4262           // Lower q_high
4263           q_high = q > q_low ? q - 1 : q_low;
4264
4265           // Adjust Q
4266           q = (int)((q * high_err_target) / kf_err);
4267           q = VPXMIN(q, (q_high + q_low) >> 1);
4268         } else if (kf_err < low_err_target &&
4269                    rc->projected_frame_size >= frame_under_shoot_limit) {
4270           // The key frame is much better than the previous frame
4271           // Raise q_low
4272           q_low = q < q_high ? q + 1 : q_high;
4273
4274           // Adjust Q
4275           q = (int)((q * low_err_target) / kf_err);
4276           q = VPXMIN(q, (q_high + q_low + 1) >> 1);
4277         }
4278
4279         // Clamp Q to upper and lower limits:
4280         q = clamp(q, q_low, q_high);
4281
4282         loop = q != last_q;
4283       } else if (recode_loop_test(cpi, frame_over_shoot_limit,
4284                                   frame_under_shoot_limit, q,
4285                                   VPXMAX(q_high, top_index), bottom_index)) {
4286         // Is the projected frame size out of range and are we allowed
4287         // to attempt to recode.
4288         int last_q = q;
4289         int retries = 0;
4290         int qstep;
4291
4292         if (cpi->resize_pending == 1) {
4293           // Change in frame size so go back around the recode loop.
4294           cpi->rc.frame_size_selector =
4295               SCALE_STEP1 - cpi->rc.frame_size_selector;
4296           cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
4297
4298 #if CONFIG_INTERNAL_STATS
4299           ++cpi->tot_recode_hits;
4300 #endif
4301           ++loop_count;
4302           loop = 1;
4303           continue;
4304         }
4305
4306         // Frame size out of permitted range:
4307         // Update correction factor & compute new Q to try...
4308
4309         // Frame is too large
4310         if (rc->projected_frame_size > rc->this_frame_target) {
4311           // Special case if the projected size is > the max allowed.
4312           if ((q == q_high) &&
4313               ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
4314                (!rc->is_src_frame_alt_ref &&
4315                 (rc->projected_frame_size >=
4316                  big_rate_miss_high_threshold(cpi))))) {
4317             int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth,
4318                                             big_rate_miss_high_threshold(cpi)));
4319             double q_val_high;
4320             q_val_high = vp9_convert_qindex_to_q(q_high, cm->bit_depth);
4321             q_val_high =
4322                 q_val_high * ((double)rc->projected_frame_size / max_rate);
4323             q_high = vp9_convert_q_to_qindex(q_val_high, cm->bit_depth);
4324             q_high = clamp(q_high, rc->best_quality, rc->worst_quality);
4325           }
4326
4327           // Raise Qlow as to at least the current value
4328           qstep =
4329               get_qstep_adj(rc->projected_frame_size, rc->this_frame_target);
4330           q_low = VPXMIN(q + qstep, q_high);
4331
4332           if (undershoot_seen || loop_at_this_size > 1) {
4333             // Update rate_correction_factor unless
4334             vp9_rc_update_rate_correction_factors(cpi);
4335
4336             q = (q_high + q_low + 1) / 2;
4337           } else {
4338             // Update rate_correction_factor unless
4339             vp9_rc_update_rate_correction_factors(cpi);
4340
4341             q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4342                                   VPXMAX(q_high, top_index));
4343
4344             while (q < q_low && retries < 10) {
4345               vp9_rc_update_rate_correction_factors(cpi);
4346               q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
4347                                     VPXMAX(q_high, top_index));
4348               retries++;
4349             }
4350           }
4351
4352           overshoot_seen = 1;
4353         } else {
4354           // Frame is too small
4355           qstep =
4356               get_qstep_adj(rc->this_frame_target, rc->projected_frame_size);
4357           q_high = VPXMAX(q - qstep, q_low);
4358
4359           if (overshoot_seen || loop_at_this_size > 1) {
4360             vp9_rc_update_rate_correction_factors(cpi);
4361             q = (q_high + q_low) / 2;
4362           } else {
4363             vp9_rc_update_rate_correction_factors(cpi);
4364             q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4365                                   VPXMIN(q_low, bottom_index), top_index);
4366             // Special case reset for qlow for constrained quality.
4367             // This should only trigger where there is very substantial
4368             // undershoot on a frame and the auto cq level is above
4369             // the user passsed in value.
4370             if (oxcf->rc_mode == VPX_CQ && q < q_low) {
4371               q_low = q;
4372             }
4373
4374             while (q > q_high && retries < 10) {
4375               vp9_rc_update_rate_correction_factors(cpi);
4376               q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
4377                                     VPXMIN(q_low, bottom_index), top_index);
4378               retries++;
4379             }
4380           }
4381           undershoot_seen = 1;
4382         }
4383
4384         // Clamp Q to upper and lower limits:
4385         q = clamp(q, q_low, q_high);
4386
4387         loop = (q != last_q);
4388       } else {
4389         loop = 0;
4390       }
4391     }
4392
4393     // Special case for overlay frame.
4394     if (rc->is_src_frame_alt_ref &&
4395         rc->projected_frame_size < rc->max_frame_bandwidth)
4396       loop = 0;
4397
4398     if (loop) {
4399       ++loop_count;
4400       ++loop_at_this_size;
4401
4402 #if CONFIG_INTERNAL_STATS
4403       ++cpi->tot_recode_hits;
4404 #endif
4405     }
4406
4407     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF)
4408       if (loop || !enable_acl) restore_coding_context(cpi);
4409   } while (loop);
4410
4411 #ifdef AGGRESSIVE_VBR
4412   if (two_pass_first_group_inter(cpi)) {
4413     cpi->twopass.active_worst_quality =
4414         VPXMIN(q + qrange_adj, oxcf->worst_allowed_q);
4415   } else if (!frame_is_kf_gf_arf(cpi)) {
4416 #else
4417   if (!frame_is_kf_gf_arf(cpi)) {
4418 #endif
4419     // Have we been forced to adapt Q outside the expected range by an extreme
4420     // rate miss. If so adjust the active maxQ for the subsequent frames.
4421     if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) {
4422       cpi->twopass.active_worst_quality = q;
4423     } else if (oxcf->vbr_corpus_complexity && q == q_low &&
4424                rc->projected_frame_size < rc->this_frame_target) {
4425       cpi->twopass.active_worst_quality =
4426           VPXMAX(q, cpi->twopass.active_worst_quality - 1);
4427     }
4428   }
4429
4430   if (enable_acl) {
4431     // Skip recoding, if model diff is below threshold
4432     const int thresh = compute_context_model_thresh(cpi);
4433     const int diff = compute_context_model_diff(cm);
4434     if (diff < thresh) {
4435       vpx_clear_system_state();
4436       restore_coding_context(cpi);
4437       return;
4438     }
4439
4440     vp9_encode_frame(cpi);
4441     vpx_clear_system_state();
4442     restore_coding_context(cpi);
4443   }
4444 }
4445 #endif  // !CONFIG_REALTIME_ONLY
4446
4447 static int get_ref_frame_flags(const VP9_COMP *cpi) {
4448   const int *const map = cpi->common.ref_frame_map;
4449   const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
4450   const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
4451   const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
4452   int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
4453
4454   if (gold_is_last) flags &= ~VP9_GOLD_FLAG;
4455
4456   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
4457       (cpi->svc.number_temporal_layers == 1 &&
4458        cpi->svc.number_spatial_layers == 1))
4459     flags &= ~VP9_GOLD_FLAG;
4460
4461   if (alt_is_last) flags &= ~VP9_ALT_FLAG;
4462
4463   if (gold_is_alt) flags &= ~VP9_ALT_FLAG;
4464
4465   return flags;
4466 }
4467
4468 static void set_ext_overrides(VP9_COMP *cpi) {
4469   // Overrides the defaults with the externally supplied values with
4470   // vp9_update_reference() and vp9_update_entropy() calls
4471   // Note: The overrides are valid only for the next frame passed
4472   // to encode_frame_to_data_rate() function
4473   if (cpi->ext_refresh_frame_context_pending) {
4474     cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
4475     cpi->ext_refresh_frame_context_pending = 0;
4476   }
4477   if (cpi->ext_refresh_frame_flags_pending) {
4478     cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
4479     cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
4480     cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
4481   }
4482 }
4483
4484 YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(
4485     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4486     YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type,
4487     int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2) {
4488   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4489       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4490 #if CONFIG_VP9_HIGHBITDEPTH
4491     if (cm->bit_depth == VPX_BITS_8) {
4492       vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4493                                  phase_scaler2);
4494       vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type,
4495                                  phase_scaler);
4496     } else {
4497       scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth,
4498                              filter_type2, phase_scaler2);
4499       scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth,
4500                              filter_type, phase_scaler);
4501     }
4502 #else
4503     vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2,
4504                                phase_scaler2);
4505     vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler);
4506 #endif  // CONFIG_VP9_HIGHBITDEPTH
4507     return scaled;
4508   } else {
4509     return unscaled;
4510   }
4511 }
4512
4513 YV12_BUFFER_CONFIG *vp9_scale_if_required(
4514     VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
4515     int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler) {
4516   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
4517       cm->mi_rows * MI_SIZE != unscaled->y_height) {
4518 #if CONFIG_VP9_HIGHBITDEPTH
4519     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4520         unscaled->y_height <= (scaled->y_height << 1))
4521       if (cm->bit_depth == VPX_BITS_8)
4522         vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4523       else
4524         scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth,
4525                                filter_type, phase_scaler);
4526     else
4527       scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
4528 #else
4529     if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) &&
4530         unscaled->y_height <= (scaled->y_height << 1))
4531       vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler);
4532     else
4533       scale_and_extend_frame_nonnormative(unscaled, scaled);
4534 #endif  // CONFIG_VP9_HIGHBITDEPTH
4535     return scaled;
4536   } else {
4537     return unscaled;
4538   }
4539 }
4540
4541 static void set_ref_sign_bias(VP9_COMP *cpi) {
4542   VP9_COMMON *const cm = &cpi->common;
4543   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4544   const int cur_frame_index = ref_buffer->frame_index;
4545   MV_REFERENCE_FRAME ref_frame;
4546
4547   for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
4548     const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
4549     const RefCntBuffer *const ref_cnt_buf =
4550         get_ref_cnt_buffer(&cpi->common, buf_idx);
4551     if (ref_cnt_buf) {
4552       cm->ref_frame_sign_bias[ref_frame] =
4553           cur_frame_index < ref_cnt_buf->frame_index;
4554     }
4555   }
4556 }
4557
4558 static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
4559   INTERP_FILTER ifilter;
4560   int ref_total[MAX_REF_FRAMES] = { 0 };
4561   MV_REFERENCE_FRAME ref;
4562   int mask = 0;
4563   if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
4564     return mask;
4565   for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
4566     for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
4567       ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
4568
4569   for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
4570     if ((ref_total[LAST_FRAME] &&
4571          cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
4572         (ref_total[GOLDEN_FRAME] == 0 ||
4573          cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 <
4574              ref_total[GOLDEN_FRAME]) &&
4575         (ref_total[ALTREF_FRAME] == 0 ||
4576          cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 <
4577              ref_total[ALTREF_FRAME]))
4578       mask |= 1 << ifilter;
4579   }
4580   return mask;
4581 }
4582
4583 #ifdef ENABLE_KF_DENOISE
4584 // Baseline Kernal weights for denoise
4585 static uint8_t dn_kernal_3[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 };
4586 static uint8_t dn_kernal_5[25] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 4,
4587                                    2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1 };
4588
4589 static INLINE void add_denoise_point(int centre_val, int data_val, int thresh,
4590                                      uint8_t point_weight, int *sum_val,
4591                                      int *sum_weight) {
4592   if (abs(centre_val - data_val) <= thresh) {
4593     *sum_weight += point_weight;
4594     *sum_val += (int)data_val * (int)point_weight;
4595   }
4596 }
4597
4598 static void spatial_denoise_point(uint8_t *src_ptr, const int stride,
4599                                   const int strength) {
4600   int sum_weight = 0;
4601   int sum_val = 0;
4602   int thresh = strength;
4603   int kernal_size = 5;
4604   int half_k_size = 2;
4605   int i, j;
4606   int max_diff = 0;
4607   uint8_t *tmp_ptr;
4608   uint8_t *kernal_ptr;
4609
4610   // Find the maximum deviation from the source point in the locale.
4611   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4612   for (i = 0; i < kernal_size + 2; ++i) {
4613     for (j = 0; j < kernal_size + 2; ++j) {
4614       max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j]));
4615     }
4616     tmp_ptr += stride;
4617   }
4618
4619   // Select the kernal size.
4620   if (max_diff > (strength + (strength >> 1))) {
4621     kernal_size = 3;
4622     half_k_size = 1;
4623     thresh = thresh >> 1;
4624   }
4625   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4626
4627   // Apply the kernal
4628   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4629   for (i = 0; i < kernal_size; ++i) {
4630     for (j = 0; j < kernal_size; ++j) {
4631       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4632                         &sum_val, &sum_weight);
4633       ++kernal_ptr;
4634     }
4635     tmp_ptr += stride;
4636   }
4637
4638   // Update the source value with the new filtered value
4639   *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4640 }
4641
4642 #if CONFIG_VP9_HIGHBITDEPTH
4643 static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride,
4644                                          const int strength) {
4645   int sum_weight = 0;
4646   int sum_val = 0;
4647   int thresh = strength;
4648   int kernal_size = 5;
4649   int half_k_size = 2;
4650   int i, j;
4651   int max_diff = 0;
4652   uint16_t *tmp_ptr;
4653   uint8_t *kernal_ptr;
4654
4655   // Find the maximum deviation from the source point in the locale.
4656   tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1);
4657   for (i = 0; i < kernal_size + 2; ++i) {
4658     for (j = 0; j < kernal_size + 2; ++j) {
4659       max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j]));
4660     }
4661     tmp_ptr += stride;
4662   }
4663
4664   // Select the kernal size.
4665   if (max_diff > (strength + (strength >> 1))) {
4666     kernal_size = 3;
4667     half_k_size = 1;
4668     thresh = thresh >> 1;
4669   }
4670   kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5;
4671
4672   // Apply the kernal
4673   tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size;
4674   for (i = 0; i < kernal_size; ++i) {
4675     for (j = 0; j < kernal_size; ++j) {
4676       add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr,
4677                         &sum_val, &sum_weight);
4678       ++kernal_ptr;
4679     }
4680     tmp_ptr += stride;
4681   }
4682
4683   // Update the source value with the new filtered value
4684   *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight);
4685 }
4686 #endif  // CONFIG_VP9_HIGHBITDEPTH
4687
4688 // Apply thresholded spatial noise supression to a given buffer.
4689 static void spatial_denoise_buffer(VP9_COMP *cpi, uint8_t *buffer,
4690                                    const int stride, const int width,
4691                                    const int height, const int strength) {
4692   VP9_COMMON *const cm = &cpi->common;
4693   uint8_t *src_ptr = buffer;
4694   int row;
4695   int col;
4696
4697   for (row = 0; row < height; ++row) {
4698     for (col = 0; col < width; ++col) {
4699 #if CONFIG_VP9_HIGHBITDEPTH
4700       if (cm->use_highbitdepth)
4701         highbd_spatial_denoise_point(CONVERT_TO_SHORTPTR(&src_ptr[col]), stride,
4702                                      strength);
4703       else
4704         spatial_denoise_point(&src_ptr[col], stride, strength);
4705 #else
4706       spatial_denoise_point(&src_ptr[col], stride, strength);
4707 #endif  // CONFIG_VP9_HIGHBITDEPTH
4708     }
4709     src_ptr += stride;
4710   }
4711 }
4712
4713 // Apply thresholded spatial noise supression to source.
4714 static void spatial_denoise_frame(VP9_COMP *cpi) {
4715   YV12_BUFFER_CONFIG *src = cpi->Source;
4716   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4717   TWO_PASS *const twopass = &cpi->twopass;
4718   VP9_COMMON *const cm = &cpi->common;
4719
4720   // Base the filter strength on the current active max Q.
4721   const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
4722                                               cm->bit_depth));
4723   int strength =
4724       VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4)));
4725
4726   // Denoise each of Y,U and V buffers.
4727   spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride, src->y_width,
4728                          src->y_height, strength);
4729
4730   strength += (strength >> 1);
4731   spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride, src->uv_width,
4732                          src->uv_height, strength << 1);
4733
4734   spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride, src->uv_width,
4735                          src->uv_height, strength << 1);
4736 }
4737 #endif  // ENABLE_KF_DENOISE
4738
4739 #if !CONFIG_REALTIME_ONLY
4740 static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
4741                                          uint8_t *dest) {
4742   if (cpi->common.seg.enabled)
4743     if (ALT_REF_AQ_PROTECT_GAIN) {
4744       size_t nsize = *size;
4745       int overhead;
4746
4747       // TODO(yuryg): optimize this, as
4748       // we don't really need to repack
4749
4750       save_coding_context(cpi);
4751       vp9_disable_segmentation(&cpi->common.seg);
4752       vp9_pack_bitstream(cpi, dest, &nsize);
4753       restore_coding_context(cpi);
4754
4755       overhead = (int)*size - (int)nsize;
4756
4757       if (vp9_alt_ref_aq_disable_if(cpi->alt_ref_aq, overhead, (int)*size))
4758         vp9_encode_frame(cpi);
4759       else
4760         vp9_enable_segmentation(&cpi->common.seg);
4761     }
4762 }
4763 #endif
4764
4765 static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
4766   RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
4767
4768   if (ref_buffer) {
4769     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
4770     ref_buffer->frame_index =
4771         cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
4772   }
4773 }
4774
4775 // Implementation and modifications of C. Yeo, H. L. Tan, and Y. H. Tan, "On
4776 // rate distortion optimization using SSIM," Circuits and Systems for Video
4777 // Technology, IEEE Transactions on, vol. 23, no. 7, pp. 1170-1181, 2013.
4778 // SSIM_VAR_SCALE defines the strength of the bias towards SSIM in RDO.
4779 // Some sample values are:
4780 // (for midres test set)
4781 // SSIM_VAR_SCALE  avg_psnr   ssim   ms_ssim
4782 //      8.0          9.421   -5.537  -6.898
4783 //     16.0          4.703   -5.378  -6.238
4784 //     32.0          1.929   -4.308  -4.807
4785 #define SSIM_VAR_SCALE 16.0
4786 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
4787   VP9_COMMON *cm = &cpi->common;
4788   ThreadData *td = &cpi->td;
4789   MACROBLOCK *x = &td->mb;
4790   MACROBLOCKD *xd = &x->e_mbd;
4791   uint8_t *y_buffer = cpi->Source->y_buffer;
4792   const int y_stride = cpi->Source->y_stride;
4793   const int block_size = BLOCK_16X16;
4794
4795   const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size];
4796   const int num_8x8_h = num_8x8_blocks_high_lookup[block_size];
4797   const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
4798   const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
4799   double log_sum = 0.0;
4800   int row, col;
4801
4802   const double c2 = 58.5225 * SSIM_VAR_SCALE;  // 58.5225 = (.03*255)^2
4803
4804   // Loop through each 64x64 block.
4805   for (row = 0; row < num_rows; ++row) {
4806     for (col = 0; col < num_cols; ++col) {
4807       int mi_row, mi_col;
4808       double var = 0.0, num_of_var = 0.0;
4809       const int index = row * num_cols + col;
4810
4811       for (mi_row = row * num_8x8_h;
4812            mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) {
4813         for (mi_col = col * num_8x8_w;
4814              mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) {
4815           struct buf_2d buf;
4816           const int row_offset_y = mi_row << 3;
4817           const int col_offset_y = mi_col << 3;
4818
4819           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
4820           buf.stride = y_stride;
4821
4822           // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit
4823           // and high bit videos, the variance needs to be divided by 2.0 or
4824           // 64.0 separately.
4825           // TODO(sdeng): need to tune for 12bit videos.
4826 #if CONFIG_VP9_HIGHBITDEPTH
4827           if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
4828             var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd);
4829           else
4830 #endif
4831             var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8);
4832
4833           num_of_var += 1.0;
4834         }
4835       }
4836       var = var / num_of_var / 64.0;
4837       var = 2.0 * var + c2;
4838       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
4839       log_sum += log(var);
4840     }
4841   }
4842   log_sum = exp(log_sum / (double)(num_rows * num_cols));
4843
4844   for (row = 0; row < num_rows; ++row) {
4845     for (col = 0; col < num_cols; ++col) {
4846       const int index = row * num_cols + col;
4847       cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum;
4848     }
4849   }
4850
4851   (void)xd;
4852 }
4853
4854 // Process the wiener variance in 16x16 block basis.
4855 static int qsort_comp(const void *elem1, const void *elem2) {
4856   int a = *((const int *)elem1);
4857   int b = *((const int *)elem2);
4858   if (a > b) return 1;
4859   if (a < b) return -1;
4860   return 0;
4861 }
4862
4863 static void init_mb_wiener_var_buffer(VP9_COMP *cpi) {
4864   VP9_COMMON *cm = &cpi->common;
4865
4866   if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows &&
4867       cpi->mb_wiener_var_cols >= cm->mb_cols)
4868     return;
4869
4870   vpx_free(cpi->mb_wiener_variance);
4871   cpi->mb_wiener_variance = NULL;
4872
4873   CHECK_MEM_ERROR(
4874       cm, cpi->mb_wiener_variance,
4875       vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance)));
4876   cpi->mb_wiener_var_rows = cm->mb_rows;
4877   cpi->mb_wiener_var_cols = cm->mb_cols;
4878 }
4879
4880 static void set_mb_wiener_variance(VP9_COMP *cpi) {
4881   VP9_COMMON *cm = &cpi->common;
4882   uint8_t *buffer = cpi->Source->y_buffer;
4883   int buf_stride = cpi->Source->y_stride;
4884
4885 #if CONFIG_VP9_HIGHBITDEPTH
4886   ThreadData *td = &cpi->td;
4887   MACROBLOCK *x = &td->mb;
4888   MACROBLOCKD *xd = &x->e_mbd;
4889   DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
4890   DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
4891   uint8_t *zero_pred;
4892 #else
4893   DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
4894 #endif
4895
4896   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
4897   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
4898
4899   int mb_row, mb_col, count = 0;
4900   // Hard coded operating block size
4901   const int block_size = 16;
4902   const int coeff_count = block_size * block_size;
4903   const TX_SIZE tx_size = TX_16X16;
4904
4905 #if CONFIG_VP9_HIGHBITDEPTH
4906   xd->cur_buf = cpi->Source;
4907   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4908     zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
4909     memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count);
4910   } else {
4911     zero_pred = zero_pred8;
4912     memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count);
4913   }
4914 #else
4915   memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
4916 #endif
4917
4918   cpi->norm_wiener_variance = 0;
4919
4920   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
4921     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
4922       int idx;
4923       int16_t median_val = 0;
4924       uint8_t *mb_buffer =
4925           buffer + mb_row * block_size * buf_stride + mb_col * block_size;
4926       int64_t wiener_variance = 0;
4927
4928 #if CONFIG_VP9_HIGHBITDEPTH
4929       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4930         vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
4931                                   mb_buffer, buf_stride, zero_pred, block_size,
4932                                   xd->bd);
4933         highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
4934       } else {
4935         vpx_subtract_block(block_size, block_size, src_diff, block_size,
4936                            mb_buffer, buf_stride, zero_pred, block_size);
4937         wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
4938       }
4939 #else
4940       vpx_subtract_block(block_size, block_size, src_diff, block_size,
4941                          mb_buffer, buf_stride, zero_pred, block_size);
4942       wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
4943 #endif  // CONFIG_VP9_HIGHBITDEPTH
4944
4945       coeff[0] = 0;
4946       for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]);
4947
4948       qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp);
4949
4950       // Noise level estimation
4951       median_val = coeff[coeff_count / 2];
4952
4953       // Wiener filter
4954       for (idx = 1; idx < coeff_count; ++idx) {
4955         int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
4956         int64_t tmp_coeff = (int64_t)coeff[idx];
4957         if (median_val) {
4958           tmp_coeff = (sqr_coeff * coeff[idx]) /
4959                       (sqr_coeff + (int64_t)median_val * median_val);
4960         }
4961         wiener_variance += tmp_coeff * tmp_coeff;
4962       }
4963       cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
4964           wiener_variance / coeff_count;
4965       cpi->norm_wiener_variance +=
4966           cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col];
4967       ++count;
4968     }
4969   }
4970
4971   if (count) cpi->norm_wiener_variance /= count;
4972   cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance);
4973 }
4974
4975 static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
4976                                       uint8_t *dest,
4977                                       unsigned int *frame_flags) {
4978   VP9_COMMON *const cm = &cpi->common;
4979   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
4980   struct segmentation *const seg = &cm->seg;
4981   TX_SIZE t;
4982
4983   // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
4984   // No need to set svc.skip_enhancement_layer if whole superframe will be
4985   // dropped.
4986   if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
4987       cpi->oxcf.target_bandwidth == 0 &&
4988       !(cpi->svc.framedrop_mode != LAYER_DROP &&
4989         (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP ||
4990          cpi->svc
4991              .force_drop_constrained_from_above[cpi->svc.number_spatial_layers -
4992                                                 1]) &&
4993         cpi->svc.drop_spatial_layer[0])) {
4994     cpi->svc.skip_enhancement_layer = 1;
4995     vp9_rc_postencode_update_drop_frame(cpi);
4996     cpi->ext_refresh_frame_flags_pending = 0;
4997     cpi->last_frame_dropped = 1;
4998     cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
4999     cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
5000     vp9_inc_frame_in_layer(cpi);
5001     return;
5002   }
5003
5004   set_ext_overrides(cpi);
5005   vpx_clear_system_state();
5006
5007 #ifdef ENABLE_KF_DENOISE
5008   // Spatial denoise of key frame.
5009   if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi);
5010 #endif
5011
5012   if (cm->show_existing_frame == 0) {
5013     // Update frame index
5014     set_frame_index(cpi, cm);
5015
5016     // Set the arf sign bias for this frame.
5017     set_ref_sign_bias(cpi);
5018   }
5019
5020   // Set default state for segment based loop filter update flags.
5021   cm->lf.mode_ref_delta_update = 0;
5022
5023   if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search)
5024     cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi);
5025
5026   // Set various flags etc to special state if it is a key frame.
5027   if (frame_is_intra_only(cm)) {
5028     // Reset the loop filter deltas and segmentation map.
5029     vp9_reset_segment_features(&cm->seg);
5030
5031     // If segmentation is enabled force a map update for key frames.
5032     if (seg->enabled) {
5033       seg->update_map = 1;
5034       seg->update_data = 1;
5035     }
5036
5037     // The alternate reference frame cannot be active for a key frame.
5038     cpi->rc.source_alt_ref_active = 0;
5039
5040     cm->error_resilient_mode = oxcf->error_resilient_mode;
5041     cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
5042
5043     // By default, encoder assumes decoder can use prev_mi.
5044     if (cm->error_resilient_mode) {
5045       cm->frame_parallel_decoding_mode = 1;
5046       cm->reset_frame_context = 0;
5047       cm->refresh_frame_context = 0;
5048     } else if (cm->intra_only) {
5049       // Only reset the current context.
5050       cm->reset_frame_context = 2;
5051     }
5052   }
5053
5054   if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi);
5055
5056   if (oxcf->aq_mode == PERCEPTUAL_AQ) {
5057     init_mb_wiener_var_buffer(cpi);
5058     set_mb_wiener_variance(cpi);
5059   }
5060
5061   vpx_clear_system_state();
5062
5063 #if CONFIG_INTERNAL_STATS
5064   memset(cpi->mode_chosen_counts, 0,
5065          MAX_MODES * sizeof(*cpi->mode_chosen_counts));
5066 #endif
5067 #if CONFIG_CONSISTENT_RECODE
5068   // Backup to ensure consistency between recodes
5069   save_encode_params(cpi);
5070 #endif
5071
5072   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
5073     if (!encode_without_recode_loop(cpi, size, dest)) return;
5074   } else {
5075 #if !CONFIG_REALTIME_ONLY
5076     encode_with_recode_loop(cpi, size, dest);
5077 #endif
5078   }
5079
5080   // TODO(jingning): When using show existing frame mode, we assume that the
5081   // current ARF will be directly used as the final reconstructed frame. This is
5082   // an encoder control scheme. One could in principle explore other
5083   // possibilities to arrange the reference frame buffer and their coding order.
5084   if (cm->show_existing_frame) {
5085     ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx,
5086                cm->ref_frame_map[cpi->alt_fb_idx]);
5087   }
5088
5089 #if !CONFIG_REALTIME_ONLY
5090   // Disable segmentation if it decrease rate/distortion ratio
5091   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5092     vp9_try_disable_lookahead_aq(cpi, size, dest);
5093 #endif
5094
5095 #if CONFIG_VP9_TEMPORAL_DENOISING
5096 #ifdef OUTPUT_YUV_DENOISED
5097   if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) {
5098     vpx_write_yuv_frame(yuv_denoised_file,
5099                         &cpi->denoiser.running_avg_y[INTRA_FRAME]);
5100   }
5101 #endif
5102 #endif
5103 #ifdef OUTPUT_YUV_SKINMAP
5104   if (cpi->common.current_video_frame > 1) {
5105     vp9_output_skin_map(cpi, yuv_skinmap_file);
5106   }
5107 #endif
5108
5109   // Special case code to reduce pulsing when key frames are forced at a
5110   // fixed interval. Note the reconstruction error if it is the frame before
5111   // the force key frame
5112   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
5113 #if CONFIG_VP9_HIGHBITDEPTH
5114     if (cm->use_highbitdepth) {
5115       cpi->ambient_err =
5116           vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5117     } else {
5118       cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5119     }
5120 #else
5121     cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
5122 #endif  // CONFIG_VP9_HIGHBITDEPTH
5123   }
5124
5125   // If the encoder forced a KEY_FRAME decision
5126   if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1;
5127
5128   cm->frame_to_show = get_frame_new_buffer(cm);
5129   cm->frame_to_show->color_space = cm->color_space;
5130   cm->frame_to_show->color_range = cm->color_range;
5131   cm->frame_to_show->render_width = cm->render_width;
5132   cm->frame_to_show->render_height = cm->render_height;
5133
5134   // Pick the loop filter level for the frame.
5135   loopfilter_frame(cpi, cm);
5136
5137   if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
5138
5139   // build the bitstream
5140   vp9_pack_bitstream(cpi, dest, size);
5141
5142   if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
5143       cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
5144     restore_coding_context(cpi);
5145     return;
5146   }
5147
5148   cpi->last_frame_dropped = 0;
5149   cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
5150   if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
5151     cpi->svc.num_encoded_top_layer++;
5152
5153   // Keep track of the frame buffer index updated/refreshed for the
5154   // current encoded TL0 superframe.
5155   if (cpi->svc.temporal_layer_id == 0) {
5156     if (cpi->refresh_last_frame)
5157       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
5158     else if (cpi->refresh_golden_frame)
5159       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
5160     else if (cpi->refresh_alt_ref_frame)
5161       cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
5162   }
5163
5164   if (cm->seg.update_map) update_reference_segmentation_map(cpi);
5165
5166   if (frame_is_intra_only(cm) == 0) {
5167     release_scaled_references(cpi);
5168   }
5169   vp9_update_reference_frames(cpi);
5170
5171   if (!cm->show_existing_frame) {
5172     for (t = TX_4X4; t <= TX_32X32; ++t) {
5173       full_to_model_counts(cpi->td.counts->coef[t],
5174                            cpi->td.rd_counts.coef_counts[t]);
5175     }
5176
5177     if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
5178       if (!frame_is_intra_only(cm)) {
5179         vp9_adapt_mode_probs(cm);
5180         vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
5181       }
5182       vp9_adapt_coef_probs(cm);
5183     }
5184   }
5185
5186   cpi->ext_refresh_frame_flags_pending = 0;
5187
5188   if (cpi->refresh_golden_frame == 1)
5189     cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
5190   else
5191     cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
5192
5193   if (cpi->refresh_alt_ref_frame == 1)
5194     cpi->frame_flags |= FRAMEFLAGS_ALTREF;
5195   else
5196     cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
5197
5198   cpi->ref_frame_flags = get_ref_frame_flags(cpi);
5199
5200   cm->last_frame_type = cm->frame_type;
5201
5202   vp9_rc_postencode_update(cpi, *size);
5203
5204   *size = VPXMAX(1, *size);
5205
5206 #if 0
5207   output_frame_level_debug_stats(cpi);
5208 #endif
5209
5210   if (cm->frame_type == KEY_FRAME) {
5211     // Tell the caller that the frame was coded as a key frame
5212     *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
5213   } else {
5214     *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
5215   }
5216
5217   // Clear the one shot update flags for segmentation map and mode/ref loop
5218   // filter deltas.
5219   cm->seg.update_map = 0;
5220   cm->seg.update_data = 0;
5221   cm->lf.mode_ref_delta_update = 0;
5222
5223   // keep track of the last coded dimensions
5224   cm->last_width = cm->width;
5225   cm->last_height = cm->height;
5226
5227   // reset to normal state now that we are done.
5228   if (!cm->show_existing_frame) {
5229     cm->last_show_frame = cm->show_frame;
5230     cm->prev_frame = cm->cur_frame;
5231   }
5232
5233   if (cm->show_frame) {
5234     vp9_swap_mi_and_prev_mi(cm);
5235     // Don't increment frame counters if this was an altref buffer
5236     // update not a real frame
5237     ++cm->current_video_frame;
5238     if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
5239   }
5240
5241   if (cpi->use_svc) {
5242     cpi->svc
5243         .layer_context[cpi->svc.spatial_layer_id *
5244                            cpi->svc.number_temporal_layers +
5245                        cpi->svc.temporal_layer_id]
5246         .last_frame_type = cm->frame_type;
5247     // Reset layer_sync back to 0 for next frame.
5248     cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
5249   }
5250
5251   cpi->force_update_segmentation = 0;
5252
5253 #if !CONFIG_REALTIME_ONLY
5254   if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
5255     vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
5256 #endif
5257
5258   cpi->svc.previous_frame_is_intra_only = cm->intra_only;
5259   cpi->svc.set_intra_only_frame = 0;
5260 }
5261
5262 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5263                       unsigned int *frame_flags) {
5264   vp9_rc_get_svc_params(cpi);
5265   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5266 }
5267
5268 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5269                         unsigned int *frame_flags) {
5270   if (cpi->oxcf.rc_mode == VPX_CBR) {
5271     vp9_rc_get_one_pass_cbr_params(cpi);
5272   } else {
5273     vp9_rc_get_one_pass_vbr_params(cpi);
5274   }
5275   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5276 }
5277
5278 #if !CONFIG_REALTIME_ONLY
5279 static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
5280                         unsigned int *frame_flags) {
5281   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
5282 #if CONFIG_MISMATCH_DEBUG
5283   mismatch_move_frame_idx_w();
5284 #endif
5285   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
5286
5287   vp9_twopass_postencode_update(cpi);
5288 }
5289 #endif  // !CONFIG_REALTIME_ONLY
5290
5291 static void init_ref_frame_bufs(VP9_COMMON *cm) {
5292   int i;
5293   BufferPool *const pool = cm->buffer_pool;
5294   cm->new_fb_idx = INVALID_IDX;
5295   for (i = 0; i < REF_FRAMES; ++i) {
5296     cm->ref_frame_map[i] = INVALID_IDX;
5297   }
5298   for (i = 0; i < FRAME_BUFFERS; ++i) {
5299     pool->frame_bufs[i].ref_count = 0;
5300   }
5301 }
5302
5303 static void check_initial_width(VP9_COMP *cpi,
5304 #if CONFIG_VP9_HIGHBITDEPTH
5305                                 int use_highbitdepth,
5306 #endif
5307                                 int subsampling_x, int subsampling_y) {
5308   VP9_COMMON *const cm = &cpi->common;
5309
5310   if (!cpi->initial_width ||
5311 #if CONFIG_VP9_HIGHBITDEPTH
5312       cm->use_highbitdepth != use_highbitdepth ||
5313 #endif
5314       cm->subsampling_x != subsampling_x ||
5315       cm->subsampling_y != subsampling_y) {
5316     cm->subsampling_x = subsampling_x;
5317     cm->subsampling_y = subsampling_y;
5318 #if CONFIG_VP9_HIGHBITDEPTH
5319     cm->use_highbitdepth = use_highbitdepth;
5320 #endif
5321
5322     alloc_raw_frame_buffers(cpi);
5323     init_ref_frame_bufs(cm);
5324     alloc_util_frame_buffers(cpi);
5325
5326     init_motion_estimation(cpi);  // TODO(agrange) This can be removed.
5327
5328     cpi->initial_width = cm->width;
5329     cpi->initial_height = cm->height;
5330     cpi->initial_mbs = cm->MBs;
5331   }
5332 }
5333
5334 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
5335                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
5336                           int64_t end_time) {
5337   VP9_COMMON *const cm = &cpi->common;
5338   struct vpx_usec_timer timer;
5339   int res = 0;
5340   const int subsampling_x = sd->subsampling_x;
5341   const int subsampling_y = sd->subsampling_y;
5342 #if CONFIG_VP9_HIGHBITDEPTH
5343   const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
5344 #endif
5345
5346 #if CONFIG_VP9_HIGHBITDEPTH
5347   check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
5348 #else
5349   check_initial_width(cpi, subsampling_x, subsampling_y);
5350 #endif  // CONFIG_VP9_HIGHBITDEPTH
5351
5352 #if CONFIG_VP9_HIGHBITDEPTH
5353   // Disable denoiser for high bitdepth since vp9_denoiser_filter only works for
5354   // 8 bits.
5355   if (cm->bit_depth > 8) cpi->oxcf.noise_sensitivity = 0;
5356 #endif
5357
5358 #if CONFIG_VP9_TEMPORAL_DENOISING
5359   setup_denoiser_buffer(cpi);
5360 #endif
5361   vpx_usec_timer_start(&timer);
5362
5363   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
5364 #if CONFIG_VP9_HIGHBITDEPTH
5365                          use_highbitdepth,
5366 #endif  // CONFIG_VP9_HIGHBITDEPTH
5367                          frame_flags))
5368     res = -1;
5369   vpx_usec_timer_mark(&timer);
5370   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
5371
5372   if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
5373       (subsampling_x != 1 || subsampling_y != 1)) {
5374     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5375                        "Non-4:2:0 color format requires profile 1 or 3");
5376     res = -1;
5377   }
5378   if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
5379       (subsampling_x == 1 && subsampling_y == 1)) {
5380     vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
5381                        "4:2:0 color format requires profile 0 or 2");
5382     res = -1;
5383   }
5384
5385   return res;
5386 }
5387
5388 static int frame_is_reference(const VP9_COMP *cpi) {
5389   const VP9_COMMON *cm = &cpi->common;
5390
5391   return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
5392          cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame ||
5393          cm->refresh_frame_context || cm->lf.mode_ref_delta_update ||
5394          cm->seg.update_map || cm->seg.update_data;
5395 }
5396
5397 static void adjust_frame_rate(VP9_COMP *cpi,
5398                               const struct lookahead_entry *source) {
5399   int64_t this_duration;
5400   int step = 0;
5401
5402   if (source->ts_start == cpi->first_time_stamp_ever) {
5403     this_duration = source->ts_end - source->ts_start;
5404     step = 1;
5405   } else {
5406     int64_t last_duration =
5407         cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
5408
5409     this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
5410
5411     // do a step update if the duration changes by 10%
5412     if (last_duration)
5413       step = (int)((this_duration - last_duration) * 10 / last_duration);
5414   }
5415
5416   if (this_duration) {
5417     if (step) {
5418       vp9_new_framerate(cpi, 10000000.0 / this_duration);
5419     } else {
5420       // Average this frame's rate into the last second's average
5421       // frame rate. If we haven't seen 1 second yet, then average
5422       // over the whole interval seen.
5423       const double interval = VPXMIN(
5424           (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
5425       double avg_duration = 10000000.0 / cpi->framerate;
5426       avg_duration *= (interval - avg_duration + this_duration);
5427       avg_duration /= interval;
5428
5429       vp9_new_framerate(cpi, 10000000.0 / avg_duration);
5430     }
5431   }
5432   cpi->last_time_stamp_seen = source->ts_start;
5433   cpi->last_end_time_stamp_seen = source->ts_end;
5434 }
5435
5436 // Returns 0 if this is not an alt ref else the offset of the source frame
5437 // used as the arf midpoint.
5438 static int get_arf_src_index(VP9_COMP *cpi) {
5439   RATE_CONTROL *const rc = &cpi->rc;
5440   int arf_src_index = 0;
5441   if (is_altref_enabled(cpi)) {
5442     if (cpi->oxcf.pass == 2) {
5443       const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5444       if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
5445         arf_src_index = gf_group->arf_src_offset[gf_group->index];
5446       }
5447     } else if (rc->source_alt_ref_pending) {
5448       arf_src_index = rc->frames_till_gf_update_due;
5449     }
5450   }
5451   return arf_src_index;
5452 }
5453
5454 static void check_src_altref(VP9_COMP *cpi,
5455                              const struct lookahead_entry *source) {
5456   RATE_CONTROL *const rc = &cpi->rc;
5457
5458   if (cpi->oxcf.pass == 2) {
5459     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
5460     rc->is_src_frame_alt_ref =
5461         (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
5462   } else {
5463     rc->is_src_frame_alt_ref =
5464         cpi->alt_ref_source && (source == cpi->alt_ref_source);
5465   }
5466
5467   if (rc->is_src_frame_alt_ref) {
5468     // Current frame is an ARF overlay frame.
5469     cpi->alt_ref_source = NULL;
5470
5471     // Don't refresh the last buffer for an ARF overlay frame. It will
5472     // become the GF so preserve last as an alternative prediction option.
5473     cpi->refresh_last_frame = 0;
5474   }
5475 }
5476
5477 #if CONFIG_INTERNAL_STATS
5478 static void adjust_image_stat(double y, double u, double v, double all,
5479                               ImageStat *s) {
5480   s->stat[Y] += y;
5481   s->stat[U] += u;
5482   s->stat[V] += v;
5483   s->stat[ALL] += all;
5484   s->worst = VPXMIN(s->worst, all);
5485 }
5486 #endif  // CONFIG_INTERNAL_STATS
5487
5488 // Adjust the maximum allowable frame size for the target level.
5489 static void level_rc_framerate(VP9_COMP *cpi, int arf_src_index) {
5490   RATE_CONTROL *const rc = &cpi->rc;
5491   LevelConstraint *const ls = &cpi->level_constraint;
5492   VP9_COMMON *const cm = &cpi->common;
5493   const double max_cpb_size = ls->max_cpb_size;
5494   vpx_clear_system_state();
5495   rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, ls->max_frame_size);
5496   if (frame_is_intra_only(cm)) {
5497     rc->max_frame_bandwidth =
5498         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.5));
5499   } else if (arf_src_index > 0) {
5500     rc->max_frame_bandwidth =
5501         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.4));
5502   } else {
5503     rc->max_frame_bandwidth =
5504         VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.2));
5505   }
5506 }
5507
5508 static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
5509   VP9_COMMON *const cm = &cpi->common;
5510   Vp9LevelInfo *const level_info = &cpi->level_info;
5511   Vp9LevelSpec *const level_spec = &level_info->level_spec;
5512   Vp9LevelStats *const level_stats = &level_info->level_stats;
5513   int i, idx;
5514   uint64_t luma_samples, dur_end;
5515   const uint32_t luma_pic_size = cm->width * cm->height;
5516   const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height);
5517   LevelConstraint *const level_constraint = &cpi->level_constraint;
5518   const int8_t level_index = level_constraint->level_index;
5519   double cpb_data_size;
5520
5521   vpx_clear_system_state();
5522
5523   // update level_stats
5524   level_stats->total_compressed_size += *size;
5525   if (cm->show_frame) {
5526     level_stats->total_uncompressed_size +=
5527         luma_pic_size +
5528         2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y));
5529     level_stats->time_encoded =
5530         (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
5531         (double)TICKS_PER_SEC;
5532   }
5533
5534   if (arf_src_index > 0) {
5535     if (!level_stats->seen_first_altref) {
5536       level_stats->seen_first_altref = 1;
5537     } else if (level_stats->frames_since_last_altref <
5538                level_spec->min_altref_distance) {
5539       level_spec->min_altref_distance = level_stats->frames_since_last_altref;
5540     }
5541     level_stats->frames_since_last_altref = 0;
5542   } else {
5543     ++level_stats->frames_since_last_altref;
5544   }
5545
5546   if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) {
5547     idx = (level_stats->frame_window_buffer.start +
5548            level_stats->frame_window_buffer.len++) %
5549           FRAME_WINDOW_SIZE;
5550   } else {
5551     idx = level_stats->frame_window_buffer.start;
5552     level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE;
5553   }
5554   level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen;
5555   level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size);
5556   level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size;
5557
5558   if (cm->frame_type == KEY_FRAME) {
5559     level_stats->ref_refresh_map = 0;
5560   } else {
5561     int count = 0;
5562     level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi);
5563     // Also need to consider the case where the encoder refers to a buffer
5564     // that has been implicitly refreshed after encoding a keyframe.
5565     if (!cm->intra_only) {
5566       level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx);
5567       level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx);
5568       level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx);
5569     }
5570     for (i = 0; i < REF_FRAMES; ++i) {
5571       count += (level_stats->ref_refresh_map >> i) & 1;
5572     }
5573     if (count > level_spec->max_ref_frame_buffers) {
5574       level_spec->max_ref_frame_buffers = count;
5575     }
5576   }
5577
5578   // update average_bitrate
5579   level_spec->average_bitrate = (double)level_stats->total_compressed_size /
5580                                 125.0 / level_stats->time_encoded;
5581
5582   // update max_luma_sample_rate
5583   luma_samples = 0;
5584   for (i = 0; i < level_stats->frame_window_buffer.len; ++i) {
5585     idx = (level_stats->frame_window_buffer.start +
5586            level_stats->frame_window_buffer.len - 1 - i) %
5587           FRAME_WINDOW_SIZE;
5588     if (i == 0) {
5589       dur_end = level_stats->frame_window_buffer.buf[idx].ts;
5590     }
5591     if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >=
5592         TICKS_PER_SEC) {
5593       break;
5594     }
5595     luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples;
5596   }
5597   if (luma_samples > level_spec->max_luma_sample_rate) {
5598     level_spec->max_luma_sample_rate = luma_samples;
5599   }
5600
5601   // update max_cpb_size
5602   cpb_data_size = 0;
5603   for (i = 0; i < CPB_WINDOW_SIZE; ++i) {
5604     if (i >= level_stats->frame_window_buffer.len) break;
5605     idx = (level_stats->frame_window_buffer.start +
5606            level_stats->frame_window_buffer.len - 1 - i) %
5607           FRAME_WINDOW_SIZE;
5608     cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5609   }
5610   cpb_data_size = cpb_data_size / 125.0;
5611   if (cpb_data_size > level_spec->max_cpb_size) {
5612     level_spec->max_cpb_size = cpb_data_size;
5613   }
5614
5615   // update max_luma_picture_size
5616   if (luma_pic_size > level_spec->max_luma_picture_size) {
5617     level_spec->max_luma_picture_size = luma_pic_size;
5618   }
5619
5620   // update max_luma_picture_breadth
5621   if (luma_pic_breadth > level_spec->max_luma_picture_breadth) {
5622     level_spec->max_luma_picture_breadth = luma_pic_breadth;
5623   }
5624
5625   // update compression_ratio
5626   level_spec->compression_ratio = (double)level_stats->total_uncompressed_size *
5627                                   cm->bit_depth /
5628                                   level_stats->total_compressed_size / 8.0;
5629
5630   // update max_col_tiles
5631   if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) {
5632     level_spec->max_col_tiles = (1 << cm->log2_tile_cols);
5633   }
5634
5635   if (level_index >= 0 && level_constraint->fail_flag == 0) {
5636     if (level_spec->max_luma_picture_size >
5637         vp9_level_defs[level_index].max_luma_picture_size) {
5638       level_constraint->fail_flag |= (1 << LUMA_PIC_SIZE_TOO_LARGE);
5639       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5640                          "Failed to encode to the target level %d. %s",
5641                          vp9_level_defs[level_index].level,
5642                          level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]);
5643     }
5644
5645     if (level_spec->max_luma_picture_breadth >
5646         vp9_level_defs[level_index].max_luma_picture_breadth) {
5647       level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE);
5648       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5649                          "Failed to encode to the target level %d. %s",
5650                          vp9_level_defs[level_index].level,
5651                          level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]);
5652     }
5653
5654     if ((double)level_spec->max_luma_sample_rate >
5655         (double)vp9_level_defs[level_index].max_luma_sample_rate *
5656             (1 + SAMPLE_RATE_GRACE_P)) {
5657       level_constraint->fail_flag |= (1 << LUMA_SAMPLE_RATE_TOO_LARGE);
5658       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5659                          "Failed to encode to the target level %d. %s",
5660                          vp9_level_defs[level_index].level,
5661                          level_fail_messages[LUMA_SAMPLE_RATE_TOO_LARGE]);
5662     }
5663
5664     if (level_spec->max_col_tiles > vp9_level_defs[level_index].max_col_tiles) {
5665       level_constraint->fail_flag |= (1 << TOO_MANY_COLUMN_TILE);
5666       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5667                          "Failed to encode to the target level %d. %s",
5668                          vp9_level_defs[level_index].level,
5669                          level_fail_messages[TOO_MANY_COLUMN_TILE]);
5670     }
5671
5672     if (level_spec->min_altref_distance <
5673         vp9_level_defs[level_index].min_altref_distance) {
5674       level_constraint->fail_flag |= (1 << ALTREF_DIST_TOO_SMALL);
5675       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5676                          "Failed to encode to the target level %d. %s",
5677                          vp9_level_defs[level_index].level,
5678                          level_fail_messages[ALTREF_DIST_TOO_SMALL]);
5679     }
5680
5681     if (level_spec->max_ref_frame_buffers >
5682         vp9_level_defs[level_index].max_ref_frame_buffers) {
5683       level_constraint->fail_flag |= (1 << TOO_MANY_REF_BUFFER);
5684       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5685                          "Failed to encode to the target level %d. %s",
5686                          vp9_level_defs[level_index].level,
5687                          level_fail_messages[TOO_MANY_REF_BUFFER]);
5688     }
5689
5690     if (level_spec->max_cpb_size > vp9_level_defs[level_index].max_cpb_size) {
5691       level_constraint->fail_flag |= (1 << CPB_TOO_LARGE);
5692       vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
5693                          "Failed to encode to the target level %d. %s",
5694                          vp9_level_defs[level_index].level,
5695                          level_fail_messages[CPB_TOO_LARGE]);
5696     }
5697
5698     // Set an upper bound for the next frame size. It will be used in
5699     // level_rc_framerate() before encoding the next frame.
5700     cpb_data_size = 0;
5701     for (i = 0; i < CPB_WINDOW_SIZE - 1; ++i) {
5702       if (i >= level_stats->frame_window_buffer.len) break;
5703       idx = (level_stats->frame_window_buffer.start +
5704              level_stats->frame_window_buffer.len - 1 - i) %
5705             FRAME_WINDOW_SIZE;
5706       cpb_data_size += level_stats->frame_window_buffer.buf[idx].size;
5707     }
5708     cpb_data_size = cpb_data_size / 125.0;
5709     level_constraint->max_frame_size =
5710         (int)((vp9_level_defs[level_index].max_cpb_size - cpb_data_size) *
5711               1000.0);
5712     if (level_stats->frame_window_buffer.len < CPB_WINDOW_SIZE - 1)
5713       level_constraint->max_frame_size >>= 1;
5714   }
5715 }
5716
5717 typedef struct GF_PICTURE {
5718   YV12_BUFFER_CONFIG *frame;
5719   int ref_frame[3];
5720   FRAME_UPDATE_TYPE update_type;
5721 } GF_PICTURE;
5722
5723 static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
5724                             const GF_GROUP *gf_group, int *tpl_group_frames) {
5725   VP9_COMMON *cm = &cpi->common;
5726   int frame_idx = 0;
5727   int i;
5728   int gld_index = -1;
5729   int alt_index = -1;
5730   int lst_index = -1;
5731   int arf_index_stack[MAX_ARF_LAYERS];
5732   int arf_stack_size = 0;
5733   int extend_frame_count = 0;
5734   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
5735   int frame_gop_offset = 0;
5736
5737   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
5738   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
5739
5740   memset(recon_frame_index, -1, sizeof(recon_frame_index));
5741   stack_init(arf_index_stack, MAX_ARF_LAYERS);
5742
5743   // TODO(jingning): To be used later for gf frame type parsing.
5744   (void)gf_group;
5745
5746   for (i = 0; i < FRAME_BUFFERS; ++i) {
5747     if (frame_bufs[i].ref_count == 0) {
5748       alloc_frame_mvs(cm, i);
5749       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
5750                                    cm->subsampling_x, cm->subsampling_y,
5751 #if CONFIG_VP9_HIGHBITDEPTH
5752                                    cm->use_highbitdepth,
5753 #endif
5754                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
5755                                    NULL, NULL, NULL))
5756         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
5757                            "Failed to allocate frame buffer");
5758
5759       recon_frame_index[frame_idx] = i;
5760       ++frame_idx;
5761
5762       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
5763     }
5764   }
5765
5766   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
5767     assert(recon_frame_index[i] >= 0);
5768     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
5769   }
5770
5771   *tpl_group_frames = 0;
5772
5773   // Initialize Golden reference frame.
5774   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
5775   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
5776   gf_picture[0].update_type = gf_group->update_type[0];
5777   gld_index = 0;
5778   ++*tpl_group_frames;
5779
5780   // Initialize base layer ARF frame
5781   gf_picture[1].frame = cpi->Source;
5782   gf_picture[1].ref_frame[0] = gld_index;
5783   gf_picture[1].ref_frame[1] = lst_index;
5784   gf_picture[1].ref_frame[2] = alt_index;
5785   gf_picture[1].update_type = gf_group->update_type[1];
5786   alt_index = 1;
5787   ++*tpl_group_frames;
5788
5789   // Initialize P frames
5790   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
5791     struct lookahead_entry *buf;
5792     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
5793     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
5794
5795     if (buf == NULL) break;
5796
5797     gf_picture[frame_idx].frame = &buf->img;
5798     gf_picture[frame_idx].ref_frame[0] = gld_index;
5799     gf_picture[frame_idx].ref_frame[1] = lst_index;
5800     gf_picture[frame_idx].ref_frame[2] = alt_index;
5801     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
5802
5803     switch (gf_group->update_type[frame_idx]) {
5804       case ARF_UPDATE:
5805         stack_push(arf_index_stack, alt_index, arf_stack_size);
5806         ++arf_stack_size;
5807         alt_index = frame_idx;
5808         break;
5809       case LF_UPDATE: lst_index = frame_idx; break;
5810       case OVERLAY_UPDATE:
5811         gld_index = frame_idx;
5812         alt_index = stack_pop(arf_index_stack, arf_stack_size);
5813         --arf_stack_size;
5814         break;
5815       case USE_BUF_FRAME:
5816         lst_index = alt_index;
5817         alt_index = stack_pop(arf_index_stack, arf_stack_size);
5818         --arf_stack_size;
5819         break;
5820       default: break;
5821     }
5822
5823     ++*tpl_group_frames;
5824
5825     // The length of group of pictures is baseline_gf_interval, plus the
5826     // beginning golden frame from last GOP, plus the last overlay frame in
5827     // the same GOP.
5828     if (frame_idx == gf_group->gf_group_size) break;
5829   }
5830
5831   alt_index = -1;
5832   ++frame_idx;
5833   ++frame_gop_offset;
5834
5835   // Extend two frames outside the current gf group.
5836   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
5837     struct lookahead_entry *buf =
5838         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
5839
5840     if (buf == NULL) break;
5841
5842     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
5843
5844     gf_picture[frame_idx].frame = &buf->img;
5845     gf_picture[frame_idx].ref_frame[0] = gld_index;
5846     gf_picture[frame_idx].ref_frame[1] = lst_index;
5847     gf_picture[frame_idx].ref_frame[2] = alt_index;
5848     gf_picture[frame_idx].update_type = LF_UPDATE;
5849     lst_index = frame_idx;
5850     ++*tpl_group_frames;
5851     ++extend_frame_count;
5852     ++frame_gop_offset;
5853   }
5854 }
5855
5856 static void init_tpl_stats(VP9_COMP *cpi) {
5857   int frame_idx;
5858   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
5859     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
5860     memset(tpl_frame->tpl_stats_ptr, 0,
5861            tpl_frame->height * tpl_frame->width *
5862                sizeof(*tpl_frame->tpl_stats_ptr));
5863     tpl_frame->is_valid = 0;
5864   }
5865 }
5866
5867 #if CONFIG_NON_GREEDY_MV
5868 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
5869                                          MotionField *motion_field,
5870                                          int frame_idx, uint8_t *cur_frame_buf,
5871                                          uint8_t *ref_frame_buf, int stride,
5872                                          BLOCK_SIZE bsize, int mi_row,
5873                                          int mi_col, MV *mv) {
5874   MACROBLOCK *const x = &td->mb;
5875   MACROBLOCKD *const xd = &x->e_mbd;
5876   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5877   int step_param;
5878   uint32_t bestsme = UINT_MAX;
5879   const MvLimits tmp_mv_limits = x->mv_limits;
5880   // lambda is used to adjust the importance of motion vector consitency.
5881   // TODO(angiebird): Figure out lambda's proper value.
5882   const int lambda = cpi->tpl_stats[frame_idx].lambda;
5883   int_mv nb_full_mvs[NB_MVS_NUM];
5884   int nb_full_mv_num;
5885
5886   MV best_ref_mv1 = { 0, 0 };
5887   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
5888
5889   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
5890   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
5891
5892   // Setup frame pointers
5893   x->plane[0].src.buf = cur_frame_buf;
5894   x->plane[0].src.stride = stride;
5895   xd->plane[0].pre[0].buf = ref_frame_buf;
5896   xd->plane[0].pre[0].stride = stride;
5897
5898   step_param = mv_sf->reduce_first_step_size;
5899   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
5900
5901   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
5902
5903   nb_full_mv_num =
5904       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
5905   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
5906                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
5907
5908   /* restore UMV window */
5909   x->mv_limits = tmp_mv_limits;
5910
5911   return bestsme;
5912 }
5913
5914 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
5915                                         uint8_t *cur_frame_buf,
5916                                         uint8_t *ref_frame_buf, int stride,
5917                                         BLOCK_SIZE bsize, MV *mv) {
5918   MACROBLOCK *const x = &td->mb;
5919   MACROBLOCKD *const xd = &x->e_mbd;
5920   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5921   uint32_t bestsme = UINT_MAX;
5922   uint32_t distortion;
5923   uint32_t sse;
5924   int cost_list[5];
5925
5926   MV best_ref_mv1 = { 0, 0 };
5927
5928   // Setup frame pointers
5929   x->plane[0].src.buf = cur_frame_buf;
5930   x->plane[0].src.stride = stride;
5931   xd->plane[0].pre[0].buf = ref_frame_buf;
5932   xd->plane[0].pre[0].stride = stride;
5933
5934   // TODO(yunqing): may use higher tap interp filter than 2 taps.
5935   // Ignore mv costing by sending NULL pointer instead of cost array
5936   bestsme = cpi->find_fractional_mv_step(
5937       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
5938       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
5939       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
5940       USE_2_TAPS);
5941
5942   return bestsme;
5943 }
5944
5945 #else  // CONFIG_NON_GREEDY_MV
5946 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
5947                                               uint8_t *cur_frame_buf,
5948                                               uint8_t *ref_frame_buf,
5949                                               int stride, BLOCK_SIZE bsize,
5950                                               MV *mv) {
5951   MACROBLOCK *const x = &td->mb;
5952   MACROBLOCKD *const xd = &x->e_mbd;
5953   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
5954   const SEARCH_METHODS search_method = NSTEP;
5955   int step_param;
5956   int sadpb = x->sadperbit16;
5957   uint32_t bestsme = UINT_MAX;
5958   uint32_t distortion;
5959   uint32_t sse;
5960   int cost_list[5];
5961   const MvLimits tmp_mv_limits = x->mv_limits;
5962
5963   MV best_ref_mv1 = { 0, 0 };
5964   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
5965
5966   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
5967   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
5968
5969   // Setup frame pointers
5970   x->plane[0].src.buf = cur_frame_buf;
5971   x->plane[0].src.stride = stride;
5972   xd->plane[0].pre[0].buf = ref_frame_buf;
5973   xd->plane[0].pre[0].stride = stride;
5974
5975   step_param = mv_sf->reduce_first_step_size;
5976   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
5977
5978   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
5979
5980   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
5981                         search_method, sadpb, cond_cost_list(cpi, cost_list),
5982                         &best_ref_mv1, mv, 0, 0);
5983
5984   /* restore UMV window */
5985   x->mv_limits = tmp_mv_limits;
5986
5987   // TODO(yunqing): may use higher tap interp filter than 2 taps.
5988   // Ignore mv costing by sending NULL pointer instead of cost array
5989   bestsme = cpi->find_fractional_mv_step(
5990       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
5991       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
5992       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
5993       USE_2_TAPS);
5994
5995   return bestsme;
5996 }
5997 #endif
5998
5999 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
6000                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
6001   int width = 0, height = 0;
6002   int bw = 4 << b_width_log2_lookup[bsize];
6003   int bh = 4 << b_height_log2_lookup[bsize];
6004
6005   switch (block) {
6006     case 0:
6007       width = grid_pos_col + bw - ref_pos_col;
6008       height = grid_pos_row + bh - ref_pos_row;
6009       break;
6010     case 1:
6011       width = ref_pos_col + bw - grid_pos_col;
6012       height = grid_pos_row + bh - ref_pos_row;
6013       break;
6014     case 2:
6015       width = grid_pos_col + bw - ref_pos_col;
6016       height = ref_pos_row + bh - grid_pos_row;
6017       break;
6018     case 3:
6019       width = ref_pos_col + bw - grid_pos_col;
6020       height = ref_pos_row + bh - grid_pos_row;
6021       break;
6022     default: assert(0);
6023   }
6024
6025   return width * height;
6026 }
6027
6028 static int round_floor(int ref_pos, int bsize_pix) {
6029   int round;
6030   if (ref_pos < 0)
6031     round = -(1 + (-ref_pos - 1) / bsize_pix);
6032   else
6033     round = ref_pos / bsize_pix;
6034
6035   return round;
6036 }
6037
6038 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
6039                             BLOCK_SIZE bsize, int stride) {
6040   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6041   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6042   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
6043   int idx, idy;
6044
6045   for (idy = 0; idy < mi_height; ++idy) {
6046     for (idx = 0; idx < mi_width; ++idx) {
6047       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
6048       const int64_t mc_flow = tpl_ptr->mc_flow;
6049       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
6050       *tpl_ptr = *src_stats;
6051       tpl_ptr->mc_flow = mc_flow;
6052       tpl_ptr->mc_ref_cost = mc_ref_cost;
6053       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
6054     }
6055   }
6056 }
6057
6058 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6059                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6060   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
6061   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
6062   MV mv = tpl_stats->mv.as_mv;
6063   int mv_row = mv.row >> 3;
6064   int mv_col = mv.col >> 3;
6065
6066   int ref_pos_row = mi_row * MI_SIZE + mv_row;
6067   int ref_pos_col = mi_col * MI_SIZE + mv_col;
6068
6069   const int bw = 4 << b_width_log2_lookup[bsize];
6070   const int bh = 4 << b_height_log2_lookup[bsize];
6071   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6072   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6073   const int pix_num = bw * bh;
6074
6075   // top-left on grid block location in pixel
6076   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
6077   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
6078   int block;
6079
6080   for (block = 0; block < 4; ++block) {
6081     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
6082     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
6083
6084     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
6085         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
6086       int overlap_area = get_overlap_area(
6087           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
6088       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
6089       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
6090
6091       int64_t mc_flow = tpl_stats->mc_dep_cost -
6092                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
6093                             tpl_stats->intra_cost;
6094
6095       int idx, idy;
6096
6097       for (idy = 0; idy < mi_height; ++idy) {
6098         for (idx = 0; idx < mi_width; ++idx) {
6099           TplDepStats *des_stats =
6100               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
6101                          (ref_mi_col + idx)];
6102
6103           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
6104           des_stats->mc_ref_cost +=
6105               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
6106               pix_num;
6107           assert(overlap_area >= 0);
6108         }
6109       }
6110     }
6111   }
6112 }
6113
6114 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
6115                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
6116   int idx, idy;
6117   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6118   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6119
6120   for (idy = 0; idy < mi_height; ++idy) {
6121     for (idx = 0; idx < mi_width; ++idx) {
6122       TplDepStats *tpl_ptr =
6123           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
6124       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
6125                          BLOCK_8X8);
6126     }
6127   }
6128 }
6129
6130 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
6131                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
6132                                TX_SIZE tx_size, int64_t *recon_error,
6133                                int64_t *sse) {
6134   MACROBLOCKD *const xd = &x->e_mbd;
6135   const struct macroblock_plane *const p = &x->plane[plane];
6136   const struct macroblockd_plane *const pd = &xd->plane[plane];
6137   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
6138   uint16_t eob;
6139   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
6140   const int shift = tx_size == TX_32X32 ? 0 : 2;
6141
6142 #if CONFIG_VP9_HIGHBITDEPTH
6143   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6144     vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6145                                  p->quant_fp, qcoeff, dqcoeff, pd->dequant,
6146                                  &eob, scan_order->scan, scan_order->iscan);
6147   } else {
6148     vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
6149                           p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
6150                           scan_order->scan, scan_order->iscan);
6151   }
6152 #else
6153   vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
6154                         qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
6155                         scan_order->iscan);
6156 #endif  // CONFIG_VP9_HIGHBITDEPTH
6157
6158   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
6159   *recon_error = VPXMAX(*recon_error, 1);
6160
6161   *sse = (*sse) >> shift;
6162   *sse = VPXMAX(*sse, 1);
6163 }
6164
6165 #if CONFIG_VP9_HIGHBITDEPTH
6166 void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6167                          TX_SIZE tx_size) {
6168   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
6169   switch (tx_size) {
6170     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
6171     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
6172     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
6173     default: assert(0);
6174   }
6175 }
6176 #endif  // CONFIG_VP9_HIGHBITDEPTH
6177
6178 void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
6179                   TX_SIZE tx_size) {
6180   switch (tx_size) {
6181     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
6182     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
6183     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
6184     default: assert(0);
6185   }
6186 }
6187
6188 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
6189                           int mi_col) {
6190   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6191   x->mv_limits.row_max =
6192       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
6193   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
6194   x->mv_limits.col_max =
6195       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
6196 }
6197
6198 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
6199                             struct scale_factors *sf, GF_PICTURE *gf_picture,
6200                             int frame_idx, TplDepFrame *tpl_frame,
6201                             int16_t *src_diff, tran_low_t *coeff,
6202                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
6203                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
6204                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
6205                             int64_t *recon_error, int64_t *sse) {
6206   VP9_COMMON *cm = &cpi->common;
6207   ThreadData *td = &cpi->td;
6208
6209   const int bw = 4 << b_width_log2_lookup[bsize];
6210   const int bh = 4 << b_height_log2_lookup[bsize];
6211   const int pix_num = bw * bh;
6212   int best_rf_idx = -1;
6213   int_mv best_mv;
6214   int64_t best_inter_cost = INT64_MAX;
6215   int64_t inter_cost;
6216   int rf_idx;
6217   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
6218
6219   int64_t best_intra_cost = INT64_MAX;
6220   int64_t intra_cost;
6221   PREDICTION_MODE mode;
6222   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6223   MODE_INFO mi_above, mi_left;
6224   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6225   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6226   TplDepStats *tpl_stats =
6227       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6228
6229   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
6230   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
6231   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
6232   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
6233   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
6234   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
6235
6236   // Intra prediction search
6237   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
6238     uint8_t *src, *dst;
6239     int src_stride, dst_stride;
6240
6241     src = xd->cur_buf->y_buffer + mb_y_offset;
6242     src_stride = xd->cur_buf->y_stride;
6243
6244     dst = &predictor[0];
6245     dst_stride = bw;
6246
6247     xd->mi[0]->sb_type = bsize;
6248     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
6249
6250     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
6251                             src_stride, dst, dst_stride, 0, 0, 0);
6252
6253 #if CONFIG_VP9_HIGHBITDEPTH
6254     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6255       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6256                                 dst_stride, xd->bd);
6257       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6258       intra_cost = vpx_highbd_satd(coeff, pix_num);
6259     } else {
6260       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
6261                          dst_stride);
6262       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6263       intra_cost = vpx_satd(coeff, pix_num);
6264     }
6265 #else
6266     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
6267     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6268     intra_cost = vpx_satd(coeff, pix_num);
6269 #endif  // CONFIG_VP9_HIGHBITDEPTH
6270
6271     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
6272   }
6273
6274   // Motion compensated prediction
6275   best_mv.as_int = 0;
6276
6277   set_mv_limits(cm, x, mi_row, mi_col);
6278
6279   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6280     int_mv mv;
6281 #if CONFIG_NON_GREEDY_MV
6282     MotionField *motion_field;
6283 #endif
6284     if (ref_frame[rf_idx] == NULL) continue;
6285
6286 #if CONFIG_NON_GREEDY_MV
6287     (void)td;
6288     motion_field = vp9_motion_field_info_get_motion_field(
6289         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6290     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6291 #else
6292     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
6293                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
6294                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
6295 #endif
6296
6297 #if CONFIG_VP9_HIGHBITDEPTH
6298     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6299       vp9_highbd_build_inter_predictor(
6300           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
6301           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
6302           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
6303           mi_row * MI_SIZE, xd->bd);
6304       vpx_highbd_subtract_block(
6305           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
6306           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
6307       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6308       inter_cost = vpx_highbd_satd(coeff, pix_num);
6309     } else {
6310       vp9_build_inter_predictor(
6311           ref_frame[rf_idx]->y_buffer + mb_y_offset,
6312           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
6313           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
6314       vpx_subtract_block(bh, bw, src_diff, bw,
6315                          xd->cur_buf->y_buffer + mb_y_offset,
6316                          xd->cur_buf->y_stride, &predictor[0], bw);
6317       wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6318       inter_cost = vpx_satd(coeff, pix_num);
6319     }
6320 #else
6321     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
6322                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
6323                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
6324                               mi_col * MI_SIZE, mi_row * MI_SIZE);
6325     vpx_subtract_block(bh, bw, src_diff, bw,
6326                        xd->cur_buf->y_buffer + mb_y_offset,
6327                        xd->cur_buf->y_stride, &predictor[0], bw);
6328     wht_fwd_txfm(src_diff, bw, coeff, tx_size);
6329     inter_cost = vpx_satd(coeff, pix_num);
6330 #endif
6331
6332     if (inter_cost < best_inter_cost) {
6333       best_rf_idx = rf_idx;
6334       best_inter_cost = inter_cost;
6335       best_mv.as_int = mv.as_int;
6336       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
6337                          sse);
6338     }
6339   }
6340   best_intra_cost = VPXMAX(best_intra_cost, 1);
6341   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
6342   tpl_stats->inter_cost = VPXMAX(
6343       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6344   tpl_stats->intra_cost = VPXMAX(
6345       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
6346   tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
6347   tpl_stats->mv.as_int = best_mv.as_int;
6348 }
6349
6350 #if CONFIG_NON_GREEDY_MV
6351 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
6352                                   int frame_idx, int rf_idx, int mi_row,
6353                                   int mi_col, struct buf_2d *src,
6354                                   struct buf_2d *pre) {
6355   const int mb_y_offset =
6356       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6357   YV12_BUFFER_CONFIG *ref_frame = NULL;
6358   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6359   if (ref_frame_idx != -1) {
6360     ref_frame = gf_picture[ref_frame_idx].frame;
6361     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
6362     src->stride = xd->cur_buf->y_stride;
6363     pre->buf = ref_frame->y_buffer + mb_y_offset;
6364     pre->stride = ref_frame->y_stride;
6365     assert(src->stride == pre->stride);
6366     return 1;
6367   } else {
6368     printf("invalid ref_frame_idx");
6369     assert(ref_frame_idx != -1);
6370     return 0;
6371   }
6372 }
6373
6374 #define kMvPreCheckLines 5
6375 #define kMvPreCheckSize 15
6376
6377 #define MV_REF_POS_NUM 3
6378 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
6379   { -1, 0 },
6380   { 0, -1 },
6381   { -1, -1 },
6382 };
6383
6384 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
6385                              int mi_col) {
6386   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
6387 }
6388
6389 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
6390                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
6391   int i;
6392   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6393   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6394   int_mv nearest_mv, near_mv, invalid_mv;
6395   nearest_mv.as_int = INVALID_MV;
6396   near_mv.as_int = INVALID_MV;
6397   invalid_mv.as_int = INVALID_MV;
6398   for (i = 0; i < MV_REF_POS_NUM; ++i) {
6399     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
6400     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
6401     assert(mv_ref_pos[i].row <= 0);
6402     assert(mv_ref_pos[i].col <= 0);
6403     if (nb_row >= 0 && nb_col >= 0) {
6404       if (nearest_mv.as_int == INVALID_MV) {
6405         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6406       } else {
6407         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
6408         if (mv.as_int == nearest_mv.as_int) {
6409           continue;
6410         } else {
6411           near_mv = mv;
6412           break;
6413         }
6414       }
6415     }
6416   }
6417   if (nearest_mv.as_int == INVALID_MV) {
6418     nearest_mv.as_mv.row = 0;
6419     nearest_mv.as_mv.col = 0;
6420   }
6421   if (near_mv.as_int == INVALID_MV) {
6422     near_mv.as_mv.row = 0;
6423     near_mv.as_mv.col = 0;
6424   }
6425   if (mv_mode == NEAREST_MV_MODE) {
6426     return nearest_mv;
6427   }
6428   if (mv_mode == NEAR_MV_MODE) {
6429     return near_mv;
6430   }
6431   assert(0);
6432   return invalid_mv;
6433 }
6434
6435 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
6436                                   MotionField *motion_field,
6437                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
6438                                   int mi_row, int mi_col) {
6439   int_mv mv;
6440   switch (mv_mode) {
6441     case ZERO_MV_MODE:
6442       mv.as_mv.row = 0;
6443       mv.as_mv.col = 0;
6444       break;
6445     case NEW_MV_MODE:
6446       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6447       break;
6448     case NEAREST_MV_MODE:
6449       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6450       break;
6451     case NEAR_MV_MODE:
6452       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
6453       break;
6454     default:
6455       mv.as_int = INVALID_MV;
6456       assert(0);
6457       break;
6458   }
6459   return mv;
6460 }
6461
6462 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
6463                           GF_PICTURE *gf_picture, MotionField *motion_field,
6464                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6465                           BLOCK_SIZE bsize, int mi_row, int mi_col,
6466                           int_mv *mv) {
6467   uint32_t sse;
6468   struct buf_2d src;
6469   struct buf_2d pre;
6470   MV full_mv;
6471   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
6472                             mi_row, mi_col);
6473   full_mv = get_full_mv(&mv->as_mv);
6474   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
6475                              &src, &pre)) {
6476     // TODO(angiebird): Consider subpixel when computing the sse.
6477     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
6478                           pre.stride, &sse);
6479     return (double)(sse << VP9_DIST_SCALE_LOG2);
6480   } else {
6481     assert(0);
6482     return 0;
6483   }
6484 }
6485
6486 static int get_mv_mode_cost(int mv_mode) {
6487   // TODO(angiebird): The probabilities are roughly inferred from
6488   // default_inter_mode_probs. Check if there is a better way to set the
6489   // probabilities.
6490   const int zero_mv_prob = 16;
6491   const int new_mv_prob = 24 * 1;
6492   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
6493   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
6494   switch (mv_mode) {
6495     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
6496     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
6497     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6498     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
6499     default: assert(0); return -1;
6500   }
6501 }
6502
6503 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
6504   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
6505                         log2(1 + abs(new_mv->col - ref_mv->col));
6506   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
6507   return mv_diff_cost;
6508 }
6509 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
6510                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
6511                           int mi_col) {
6512   double mv_cost = get_mv_mode_cost(mv_mode);
6513   if (mv_mode == NEW_MV_MODE) {
6514     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
6515                                     bsize, mi_row, mi_col)
6516                     .as_mv;
6517     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
6518                                         tpl_frame, bsize, mi_row, mi_col)
6519                         .as_mv;
6520     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
6521                                      bsize, mi_row, mi_col)
6522                      .as_mv;
6523     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
6524     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
6525     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
6526   }
6527   return mv_cost;
6528 }
6529
6530 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
6531                            GF_PICTURE *gf_picture, MotionField *motion_field,
6532                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6533                            BLOCK_SIZE bsize, int mi_row, int mi_col,
6534                            int_mv *mv) {
6535   MACROBLOCKD *xd = &x->e_mbd;
6536   double mv_dist =
6537       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
6538                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
6539   double mv_cost =
6540       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
6541   double mult = 180;
6542
6543   return mv_cost + mult * log2f(1 + mv_dist);
6544 }
6545
6546 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6547                                  GF_PICTURE *gf_picture,
6548                                  MotionField *motion_field, int frame_idx,
6549                                  TplDepFrame *tpl_frame, int rf_idx,
6550                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
6551                                  double *rd, int_mv *mv) {
6552   int best_mv_mode = ZERO_MV_MODE;
6553   int update = 0;
6554   int mv_mode;
6555   *rd = 0;
6556   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
6557     double this_rd;
6558     int_mv this_mv;
6559     if (mv_mode == NEW_MV_MODE) {
6560       continue;
6561     }
6562     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
6563                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
6564     if (update == 0) {
6565       *rd = this_rd;
6566       *mv = this_mv;
6567       best_mv_mode = mv_mode;
6568       update = 1;
6569     } else {
6570       if (this_rd < *rd) {
6571         *rd = this_rd;
6572         *mv = this_mv;
6573         best_mv_mode = mv_mode;
6574       }
6575     }
6576   }
6577   return best_mv_mode;
6578 }
6579
6580 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
6581                             GF_PICTURE *gf_picture, MotionField *motion_field,
6582                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
6583                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
6584   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6585   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6586   int tmp_mv_mode_arr[kMvPreCheckSize];
6587   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
6588   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
6589   int_mv *select_mv_arr = cpi->select_mv_arr;
6590   int_mv tmp_select_mv_arr[kMvPreCheckSize];
6591   int stride = tpl_frame->stride;
6592   double new_mv_rd = 0;
6593   double no_new_mv_rd = 0;
6594   double this_new_mv_rd = 0;
6595   double this_no_new_mv_rd = 0;
6596   int idx;
6597   int tmp_idx;
6598   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
6599
6600   // no new mv
6601   // diagnal scan order
6602   tmp_idx = 0;
6603   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6604     int r;
6605     for (r = 0; r <= idx; ++r) {
6606       int c = idx - r;
6607       int nb_row = mi_row + r * mi_height;
6608       int nb_col = mi_col + c * mi_width;
6609       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6610         double this_rd;
6611         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6612         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6613             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6614             bsize, nb_row, nb_col, &this_rd, mv);
6615         if (r == 0 && c == 0) {
6616           this_no_new_mv_rd = this_rd;
6617         }
6618         no_new_mv_rd += this_rd;
6619         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
6620         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
6621         ++tmp_idx;
6622       }
6623     }
6624   }
6625
6626   // new mv
6627   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
6628   this_new_mv_rd = eval_mv_mode(
6629       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6630       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
6631   new_mv_rd = this_new_mv_rd;
6632   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
6633   // beforehand.
6634   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
6635     int r;
6636     for (r = 0; r <= idx; ++r) {
6637       int c = idx - r;
6638       int nb_row = mi_row + r * mi_height;
6639       int nb_col = mi_col + c * mi_width;
6640       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6641         double this_rd;
6642         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
6643         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
6644             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
6645             bsize, nb_row, nb_col, &this_rd, mv);
6646         new_mv_rd += this_rd;
6647       }
6648     }
6649   }
6650
6651   // update best_mv_mode
6652   tmp_idx = 0;
6653   if (no_new_mv_rd < new_mv_rd) {
6654     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
6655       int r;
6656       for (r = 0; r <= idx; ++r) {
6657         int c = idx - r;
6658         int nb_row = mi_row + r * mi_height;
6659         int nb_col = mi_col + c * mi_width;
6660         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
6661           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
6662           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
6663           ++tmp_idx;
6664         }
6665       }
6666     }
6667     rd_diff_arr[mi_row * stride + mi_col] = 0;
6668   } else {
6669     rd_diff_arr[mi_row * stride + mi_col] =
6670         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
6671   }
6672 }
6673
6674 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
6675                                 GF_PICTURE *gf_picture,
6676                                 MotionField *motion_field, int frame_idx,
6677                                 TplDepFrame *tpl_frame, int rf_idx,
6678                                 BLOCK_SIZE bsize) {
6679   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6680   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6681   const int unit_rows = tpl_frame->mi_rows / mi_height;
6682   const int unit_cols = tpl_frame->mi_cols / mi_width;
6683   const int max_diagonal_lines = unit_rows + unit_cols - 1;
6684   int idx;
6685   for (idx = 0; idx < max_diagonal_lines; ++idx) {
6686     int r;
6687     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
6688          ++r) {
6689       int c = idx - r;
6690       int mi_row = r * mi_height;
6691       int mi_col = c * mi_width;
6692       assert(c >= 0 && c < unit_cols);
6693       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
6694       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
6695       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
6696                       rf_idx, bsize, mi_row, mi_col);
6697     }
6698   }
6699 }
6700
6701 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
6702                              MotionField *motion_field, int frame_idx,
6703                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
6704                              int mi_row, int mi_col) {
6705   VP9_COMMON *cm = &cpi->common;
6706   MACROBLOCK *x = &td->mb;
6707   MACROBLOCKD *xd = &x->e_mbd;
6708   const int mb_y_offset =
6709       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
6710   assert(ref_frame != NULL);
6711   set_mv_limits(cm, x, mi_row, mi_col);
6712   {
6713     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
6714     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
6715     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
6716     const int stride = xd->cur_buf->y_stride;
6717     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
6718                              ref_frame_buf, stride, bsize, mi_row, mi_col,
6719                              &mv.as_mv);
6720     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
6721                             bsize, &mv.as_mv);
6722     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
6723   }
6724 }
6725
6726 static void build_motion_field(
6727     VP9_COMP *cpi, int frame_idx,
6728     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
6729   VP9_COMMON *cm = &cpi->common;
6730   ThreadData *td = &cpi->td;
6731   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6732   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6733   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6734   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
6735   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
6736   int mi_row, mi_col;
6737   int rf_idx;
6738
6739   tpl_frame->lambda = (pw * ph) >> 2;
6740   assert(pw * ph == tpl_frame->lambda << 2);
6741
6742   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6743     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
6744         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6745     if (ref_frame[rf_idx] == NULL) {
6746       continue;
6747     }
6748     vp9_motion_field_reset_mvs(motion_field);
6749     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6750       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6751         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
6752                          bsize, mi_row, mi_col);
6753       }
6754     }
6755   }
6756 }
6757 #endif  // CONFIG_NON_GREEDY_MV
6758
6759 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
6760                               int frame_idx, BLOCK_SIZE bsize) {
6761   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6762   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
6763   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
6764
6765   VP9_COMMON *cm = &cpi->common;
6766   struct scale_factors sf;
6767   int rdmult, idx;
6768   ThreadData *td = &cpi->td;
6769   MACROBLOCK *x = &td->mb;
6770   MACROBLOCKD *xd = &x->e_mbd;
6771   int mi_row, mi_col;
6772
6773 #if CONFIG_VP9_HIGHBITDEPTH
6774   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
6775   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
6776   uint8_t *predictor;
6777 #else
6778   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
6779 #endif
6780   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
6781   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
6782   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
6783   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
6784
6785   const TX_SIZE tx_size = max_txsize_lookup[bsize];
6786   const int mi_height = num_8x8_blocks_high_lookup[bsize];
6787   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6788   int64_t recon_error, sse;
6789 #if CONFIG_NON_GREEDY_MV
6790   int square_block_idx;
6791   int rf_idx;
6792 #endif
6793
6794   // Setup scaling factor
6795 #if CONFIG_VP9_HIGHBITDEPTH
6796   vp9_setup_scale_factors_for_frame(
6797       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
6798       this_frame->y_crop_width, this_frame->y_crop_height,
6799       cpi->common.use_highbitdepth);
6800
6801   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6802     predictor = CONVERT_TO_BYTEPTR(predictor16);
6803   else
6804     predictor = predictor8;
6805 #else
6806   vp9_setup_scale_factors_for_frame(
6807       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
6808       this_frame->y_crop_width, this_frame->y_crop_height);
6809 #endif  // CONFIG_VP9_HIGHBITDEPTH
6810
6811   // Prepare reference frame pointers. If any reference frame slot is
6812   // unavailable, the pointer will be set to Null.
6813   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
6814     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
6815     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
6816   }
6817
6818   xd->mi = cm->mi_grid_visible;
6819   xd->mi[0] = cm->mi;
6820   xd->cur_buf = this_frame;
6821
6822   // Get rd multiplier set up.
6823   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
6824   set_error_per_bit(&cpi->td.mb, rdmult);
6825   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
6826
6827   tpl_frame->is_valid = 1;
6828
6829   cm->base_qindex = tpl_frame->base_qindex;
6830   vp9_frame_init_quantizer(cpi);
6831
6832 #if CONFIG_NON_GREEDY_MV
6833   for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
6834        ++square_block_idx) {
6835     BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
6836     build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
6837   }
6838   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6839     int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6840     if (ref_frame_idx != -1) {
6841       MotionField *motion_field = vp9_motion_field_info_get_motion_field(
6842           &cpi->motion_field_info, frame_idx, rf_idx, bsize);
6843       predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
6844                           tpl_frame, rf_idx, bsize);
6845     }
6846   }
6847 #endif
6848
6849   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6850     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6851       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
6852                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
6853                       tx_size, ref_frame, predictor, &recon_error, &sse);
6854       // Motion flow dependency dispenser.
6855       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
6856                       tpl_frame->stride);
6857
6858       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
6859                        bsize);
6860     }
6861   }
6862 }
6863
6864 #if CONFIG_NON_GREEDY_MV
6865 #define DUMP_TPL_STATS 0
6866 #if DUMP_TPL_STATS
6867 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
6868   int i, j;
6869   printf("%d %d\n", h, w);
6870   for (i = 0; i < h; ++i) {
6871     for (j = 0; j < w; ++j) {
6872       printf("%d ", buf[(row + i) * stride + col + j]);
6873     }
6874   }
6875   printf("\n");
6876 }
6877
6878 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
6879   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
6880            frame_buf->y_width);
6881   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
6882            frame_buf->uv_height, frame_buf->uv_width);
6883   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
6884            frame_buf->uv_height, frame_buf->uv_width);
6885 }
6886
6887 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
6888                            const GF_GROUP *gf_group,
6889                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
6890   int frame_idx;
6891   const VP9_COMMON *cm = &cpi->common;
6892   int rf_idx;
6893   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
6894     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6895       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
6896       int mi_row, mi_col;
6897       int ref_frame_idx;
6898       const int mi_height = num_8x8_blocks_high_lookup[bsize];
6899       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
6900       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
6901       if (ref_frame_idx != -1) {
6902         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
6903         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
6904         const int ref_gf_frame_offset =
6905             gf_group->frame_gop_index[ref_frame_idx];
6906         printf("=\n");
6907         printf(
6908             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
6909             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
6910             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
6911             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
6912         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
6913           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
6914             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
6915               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
6916                                                        frame_idx, rf_idx, bsize,
6917                                                        mi_row, mi_col);
6918               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
6919                      mv.as_mv.col);
6920             }
6921           }
6922         }
6923         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
6924           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
6925             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
6926               const TplDepStats *tpl_ptr =
6927                   &tpl_frame
6928                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
6929               printf("%f ", tpl_ptr->feature_score);
6930             }
6931           }
6932         }
6933         printf("\n");
6934
6935         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
6936           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
6937             const int mv_mode =
6938                 tpl_frame
6939                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
6940             printf("%d ", mv_mode);
6941           }
6942         }
6943         printf("\n");
6944
6945         dump_frame_buf(gf_picture[frame_idx].frame);
6946         dump_frame_buf(ref_frame_buf);
6947       }
6948     }
6949   }
6950 }
6951 #endif  // DUMP_TPL_STATS
6952 #endif  // CONFIG_NON_GREEDY_MV
6953
6954 static void init_tpl_buffer(VP9_COMP *cpi) {
6955   VP9_COMMON *cm = &cpi->common;
6956   int frame;
6957
6958   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
6959   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
6960 #if CONFIG_NON_GREEDY_MV
6961   int rf_idx;
6962
6963   vpx_free(cpi->select_mv_arr);
6964   CHECK_MEM_ERROR(
6965       cm, cpi->select_mv_arr,
6966       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
6967 #endif
6968
6969   // TODO(jingning): Reduce the actual memory use for tpl model build up.
6970   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
6971     if (cpi->tpl_stats[frame].width >= mi_cols &&
6972         cpi->tpl_stats[frame].height >= mi_rows &&
6973         cpi->tpl_stats[frame].tpl_stats_ptr)
6974       continue;
6975
6976 #if CONFIG_NON_GREEDY_MV
6977     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
6978       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
6979       CHECK_MEM_ERROR(
6980           cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
6981           vpx_calloc(mi_rows * mi_cols * 4,
6982                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
6983       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
6984       CHECK_MEM_ERROR(
6985           cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
6986           vpx_calloc(mi_rows * mi_cols * 4,
6987                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
6988     }
6989 #endif
6990     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
6991     CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
6992                     vpx_calloc(mi_rows * mi_cols,
6993                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
6994     cpi->tpl_stats[frame].is_valid = 0;
6995     cpi->tpl_stats[frame].width = mi_cols;
6996     cpi->tpl_stats[frame].height = mi_rows;
6997     cpi->tpl_stats[frame].stride = mi_cols;
6998     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
6999     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
7000   }
7001
7002   for (frame = 0; frame < REF_FRAMES; ++frame) {
7003     cpi->enc_frame_buf[frame].mem_valid = 0;
7004     cpi->enc_frame_buf[frame].released = 1;
7005   }
7006 }
7007
7008 static void free_tpl_buffer(VP9_COMP *cpi) {
7009   int frame;
7010 #if CONFIG_NON_GREEDY_MV
7011   vp9_free_motion_field_info(&cpi->motion_field_info);
7012   vpx_free(cpi->select_mv_arr);
7013 #endif
7014   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
7015 #if CONFIG_NON_GREEDY_MV
7016     int rf_idx;
7017     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
7018       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
7019       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
7020     }
7021 #endif
7022     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
7023     cpi->tpl_stats[frame].is_valid = 0;
7024   }
7025 }
7026
7027 static void setup_tpl_stats(VP9_COMP *cpi) {
7028   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
7029   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
7030   int tpl_group_frames = 0;
7031   int frame_idx;
7032   cpi->tpl_bsize = BLOCK_32X32;
7033
7034   init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
7035
7036   init_tpl_stats(cpi);
7037
7038   // Backward propagation from tpl_group_frames to 1.
7039   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
7040     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
7041     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
7042   }
7043 #if CONFIG_NON_GREEDY_MV
7044   cpi->tpl_ready = 1;
7045 #if DUMP_TPL_STATS
7046   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
7047 #endif  // DUMP_TPL_STATS
7048 #endif  // CONFIG_NON_GREEDY_MV
7049 }
7050
7051 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
7052                             size_t *size, uint8_t *dest, int64_t *time_stamp,
7053                             int64_t *time_end, int flush) {
7054   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
7055   VP9_COMMON *const cm = &cpi->common;
7056   BufferPool *const pool = cm->buffer_pool;
7057   RATE_CONTROL *const rc = &cpi->rc;
7058   struct vpx_usec_timer cmptimer;
7059   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
7060   struct lookahead_entry *last_source = NULL;
7061   struct lookahead_entry *source = NULL;
7062   int arf_src_index;
7063   const int gf_group_index = cpi->twopass.gf_group.index;
7064   int i;
7065
7066   if (is_one_pass_cbr_svc(cpi)) {
7067     vp9_one_pass_cbr_svc_start_layer(cpi);
7068   }
7069
7070   vpx_usec_timer_start(&cmptimer);
7071
7072   vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
7073
7074   // Is multi-arf enabled.
7075   // Note that at the moment multi_arf is only configured for 2 pass VBR and
7076   // will not work properly with svc.
7077   // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf"
7078   // is greater than or equal to 2.
7079   if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2))
7080     cpi->multi_layer_arf = 1;
7081   else
7082     cpi->multi_layer_arf = 0;
7083
7084   // Normal defaults
7085   cm->reset_frame_context = 0;
7086   cm->refresh_frame_context = 1;
7087   if (!is_one_pass_cbr_svc(cpi)) {
7088     cpi->refresh_last_frame = 1;
7089     cpi->refresh_golden_frame = 0;
7090     cpi->refresh_alt_ref_frame = 0;
7091   }
7092
7093   // Should we encode an arf frame.
7094   arf_src_index = get_arf_src_index(cpi);
7095
7096   if (arf_src_index) {
7097     for (i = 0; i <= arf_src_index; ++i) {
7098       struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
7099       // Avoid creating an alt-ref if there's a forced keyframe pending.
7100       if (e == NULL) {
7101         break;
7102       } else if (e->flags == VPX_EFLAG_FORCE_KF) {
7103         arf_src_index = 0;
7104         flush = 1;
7105         break;
7106       }
7107     }
7108   }
7109
7110   // Clear arf index stack before group of pictures processing starts.
7111   if (gf_group_index == 1) {
7112     stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
7113     cpi->twopass.gf_group.stack_size = 0;
7114   }
7115
7116   if (arf_src_index) {
7117     assert(arf_src_index <= rc->frames_to_key);
7118     if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
7119       cpi->alt_ref_source = source;
7120
7121 #if !CONFIG_REALTIME_ONLY
7122       if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
7123           (oxcf->arnr_strength > 0)) {
7124         int bitrate = cpi->rc.avg_frame_bandwidth / 40;
7125         int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY;
7126
7127         int not_last_frame = (cpi->lookahead->sz - arf_src_index > 1);
7128         not_last_frame |= ALT_REF_AQ_APPLY_TO_LAST_FRAME;
7129
7130         // Produce the filtered ARF frame.
7131         vp9_temporal_filter(cpi, arf_src_index);
7132         vpx_extend_frame_borders(&cpi->alt_ref_buffer);
7133
7134         // for small bitrates segmentation overhead usually
7135         // eats all bitrate gain from enabling delta quantizers
7136         if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame)
7137           vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi);
7138
7139         force_src_buffer = &cpi->alt_ref_buffer;
7140       }
7141 #endif
7142       cm->show_frame = 0;
7143       cm->intra_only = 0;
7144       cpi->refresh_alt_ref_frame = 1;
7145       cpi->refresh_golden_frame = 0;
7146       cpi->refresh_last_frame = 0;
7147       rc->is_src_frame_alt_ref = 0;
7148       rc->source_alt_ref_pending = 0;
7149     } else {
7150       rc->source_alt_ref_pending = 0;
7151     }
7152   }
7153
7154   if (!source) {
7155     // Get last frame source.
7156     if (cm->current_video_frame > 0) {
7157       if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
7158         return -1;
7159     }
7160
7161     // Read in the source frame.
7162     if (cpi->use_svc || cpi->svc.set_intra_only_frame)
7163       source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
7164     else
7165       source = vp9_lookahead_pop(cpi->lookahead, flush);
7166
7167     if (source != NULL) {
7168       cm->show_frame = 1;
7169       cm->intra_only = 0;
7170       // If the flags indicate intra frame, but if the current picture is for
7171       // spatial layer above first_spatial_layer_to_encode, it should not be an
7172       // intra picture.
7173       if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
7174           cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
7175         source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
7176       }
7177
7178       // Check to see if the frame should be encoded as an arf overlay.
7179       check_src_altref(cpi, source);
7180     }
7181   }
7182
7183   if (source) {
7184     cpi->un_scaled_source = cpi->Source =
7185         force_src_buffer ? force_src_buffer : &source->img;
7186
7187 #ifdef ENABLE_KF_DENOISE
7188     // Copy of raw source for metrics calculation.
7189     if (is_psnr_calc_enabled(cpi))
7190       vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source);
7191 #endif
7192
7193     cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
7194
7195     *time_stamp = source->ts_start;
7196     *time_end = source->ts_end;
7197     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
7198   } else {
7199     *size = 0;
7200 #if !CONFIG_REALTIME_ONLY
7201     if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
7202       vp9_end_first_pass(cpi); /* get last stats packet */
7203       cpi->twopass.first_pass_done = 1;
7204     }
7205 #endif  // !CONFIG_REALTIME_ONLY
7206     return -1;
7207   }
7208
7209   if (source->ts_start < cpi->first_time_stamp_ever) {
7210     cpi->first_time_stamp_ever = source->ts_start;
7211     cpi->last_end_time_stamp_seen = source->ts_start;
7212   }
7213
7214   // Clear down mmx registers
7215   vpx_clear_system_state();
7216
7217   // adjust frame rates based on timestamps given
7218   if (cm->show_frame) {
7219     if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
7220         cpi->svc.duration[cpi->svc.spatial_layer_id] > 0)
7221       vp9_svc_adjust_frame_rate(cpi);
7222     else
7223       adjust_frame_rate(cpi, source);
7224   }
7225
7226   if (is_one_pass_cbr_svc(cpi)) {
7227     vp9_update_temporal_layer_framerate(cpi);
7228     vp9_restore_layer_context(cpi);
7229   }
7230
7231   // Find a free buffer for the new frame, releasing the reference previously
7232   // held.
7233   if (cm->new_fb_idx != INVALID_IDX) {
7234     --pool->frame_bufs[cm->new_fb_idx].ref_count;
7235   }
7236   cm->new_fb_idx = get_free_fb(cm);
7237
7238   if (cm->new_fb_idx == INVALID_IDX) return -1;
7239
7240   cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
7241
7242   // Start with a 0 size frame.
7243   *size = 0;
7244
7245   cpi->frame_flags = *frame_flags;
7246
7247 #if !CONFIG_REALTIME_ONLY
7248   if ((oxcf->pass == 2) && !cpi->use_svc) {
7249     vp9_rc_get_second_pass_params(cpi);
7250   } else if (oxcf->pass == 1) {
7251     set_frame_size(cpi);
7252   }
7253 #endif  // !CONFIG_REALTIME_ONLY
7254
7255   if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 &&
7256       cpi->level_constraint.fail_flag == 0)
7257     level_rc_framerate(cpi, arf_src_index);
7258
7259   if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
7260     for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
7261   }
7262
7263   if (cpi->kmeans_data_arr_alloc == 0) {
7264     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7265     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7266 #if CONFIG_MULTITHREAD
7267     pthread_mutex_init(&cpi->kmeans_mutex, NULL);
7268 #endif
7269     CHECK_MEM_ERROR(
7270         cm, cpi->kmeans_data_arr,
7271         vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr)));
7272     cpi->kmeans_data_stride = mi_cols;
7273     cpi->kmeans_data_arr_alloc = 1;
7274   }
7275
7276 #if CONFIG_NON_GREEDY_MV
7277   {
7278     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
7279     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
7280     Status status = vp9_alloc_motion_field_info(
7281         &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols);
7282     if (status == STATUS_FAILED) {
7283       vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR,
7284                          "vp9_alloc_motion_field_info failed");
7285     }
7286   }
7287 #endif  // CONFIG_NON_GREEDY_MV
7288
7289   if (gf_group_index == 1 &&
7290       cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
7291       cpi->sf.enable_tpl_model) {
7292     init_tpl_buffer(cpi);
7293     vp9_estimate_qp_gop(cpi);
7294     setup_tpl_stats(cpi);
7295   }
7296
7297 #if CONFIG_BITSTREAM_DEBUG
7298   assert(cpi->oxcf.max_threads == 0 &&
7299          "bitstream debug tool does not support multithreading");
7300   bitstream_queue_record_write();
7301 #endif
7302 #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG
7303   bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame);
7304 #endif
7305
7306   cpi->td.mb.fp_src_pred = 0;
7307 #if CONFIG_REALTIME_ONLY
7308   if (cpi->use_svc) {
7309     SvcEncode(cpi, size, dest, frame_flags);
7310   } else {
7311     // One pass encode
7312     Pass0Encode(cpi, size, dest, frame_flags);
7313   }
7314 #else  // !CONFIG_REALTIME_ONLY
7315   if (oxcf->pass == 1 && !cpi->use_svc) {
7316     const int lossless = is_lossless_requested(oxcf);
7317 #if CONFIG_VP9_HIGHBITDEPTH
7318     if (cpi->oxcf.use_highbitdepth)
7319       cpi->td.mb.fwd_txfm4x4 =
7320           lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
7321     else
7322       cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7323     cpi->td.mb.highbd_inv_txfm_add =
7324         lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
7325 #else
7326     cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4;
7327 #endif  // CONFIG_VP9_HIGHBITDEPTH
7328     cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
7329     vp9_first_pass(cpi, source);
7330   } else if (oxcf->pass == 2 && !cpi->use_svc) {
7331     Pass2Encode(cpi, size, dest, frame_flags);
7332   } else if (cpi->use_svc) {
7333     SvcEncode(cpi, size, dest, frame_flags);
7334   } else {
7335     // One pass encode
7336     Pass0Encode(cpi, size, dest, frame_flags);
7337   }
7338 #endif  // CONFIG_REALTIME_ONLY
7339
7340   if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
7341
7342   if (cm->refresh_frame_context)
7343     cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
7344
7345   // No frame encoded, or frame was dropped, release scaled references.
7346   if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
7347     release_scaled_references(cpi);
7348   }
7349
7350   if (*size > 0) {
7351     cpi->droppable = !frame_is_reference(cpi);
7352   }
7353
7354   // Save layer specific state.
7355   if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 ||
7356                                     cpi->svc.number_spatial_layers > 1) &&
7357                                    oxcf->pass == 2)) {
7358     vp9_save_layer_context(cpi);
7359   }
7360
7361   vpx_usec_timer_mark(&cmptimer);
7362   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
7363
7364   // Should we calculate metrics for the frame.
7365   if (is_psnr_calc_enabled(cpi)) generate_psnr_packet(cpi);
7366
7367   if (cpi->keep_level_stats && oxcf->pass != 1)
7368     update_level_info(cpi, size, arf_src_index);
7369
7370 #if CONFIG_INTERNAL_STATS
7371
7372   if (oxcf->pass != 1) {
7373     double samples = 0.0;
7374     cpi->bytes += (int)(*size);
7375
7376     if (cm->show_frame) {
7377       uint32_t bit_depth = 8;
7378       uint32_t in_bit_depth = 8;
7379       cpi->count++;
7380 #if CONFIG_VP9_HIGHBITDEPTH
7381       if (cm->use_highbitdepth) {
7382         in_bit_depth = cpi->oxcf.input_bit_depth;
7383         bit_depth = cm->bit_depth;
7384       }
7385 #endif
7386
7387       if (cpi->b_calculate_psnr) {
7388         YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame;
7389         YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
7390         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;
7391         PSNR_STATS psnr;
7392 #if CONFIG_VP9_HIGHBITDEPTH
7393         vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,
7394                              in_bit_depth);
7395 #else
7396         vpx_calc_psnr(orig, recon, &psnr);
7397 #endif  // CONFIG_VP9_HIGHBITDEPTH
7398
7399         adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
7400                           psnr.psnr[0], &cpi->psnr);
7401         cpi->total_sq_error += psnr.sse[0];
7402         cpi->total_samples += psnr.samples[0];
7403         samples = psnr.samples[0];
7404
7405         {
7406           PSNR_STATS psnr2;
7407           double frame_ssim2 = 0, weight = 0;
7408 #if CONFIG_VP9_POSTPROC
7409           if (vpx_alloc_frame_buffer(
7410                   pp, recon->y_crop_width, recon->y_crop_height,
7411                   cm->subsampling_x, cm->subsampling_y,
7412 #if CONFIG_VP9_HIGHBITDEPTH
7413                   cm->use_highbitdepth,
7414 #endif
7415                   VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) {
7416             vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
7417                                "Failed to allocate post processing buffer");
7418           }
7419           {
7420             vp9_ppflags_t ppflags;
7421             ppflags.post_proc_flag = VP9D_DEBLOCK;
7422             ppflags.deblocking_level = 0;  // not used in vp9_post_proc_frame()
7423             ppflags.noise_level = 0;       // not used in vp9_post_proc_frame()
7424             vp9_post_proc_frame(cm, pp, &ppflags,
7425                                 cpi->un_scaled_source->y_width);
7426           }
7427 #endif
7428           vpx_clear_system_state();
7429
7430 #if CONFIG_VP9_HIGHBITDEPTH
7431           vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
7432                                cpi->oxcf.input_bit_depth);
7433 #else
7434           vpx_calc_psnr(orig, pp, &psnr2);
7435 #endif  // CONFIG_VP9_HIGHBITDEPTH
7436
7437           cpi->totalp_sq_error += psnr2.sse[0];
7438           cpi->totalp_samples += psnr2.samples[0];
7439           adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
7440                             psnr2.psnr[0], &cpi->psnrp);
7441
7442 #if CONFIG_VP9_HIGHBITDEPTH
7443           if (cm->use_highbitdepth) {
7444             frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth,
7445                                                in_bit_depth);
7446           } else {
7447             frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7448           }
7449 #else
7450           frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
7451 #endif  // CONFIG_VP9_HIGHBITDEPTH
7452
7453           cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
7454           cpi->summed_quality += frame_ssim2 * weight;
7455           cpi->summed_weights += weight;
7456
7457 #if CONFIG_VP9_HIGHBITDEPTH
7458           if (cm->use_highbitdepth) {
7459             frame_ssim2 = vpx_highbd_calc_ssim(orig, pp, &weight, bit_depth,
7460                                                in_bit_depth);
7461           } else {
7462             frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7463           }
7464 #else
7465           frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);
7466 #endif  // CONFIG_VP9_HIGHBITDEPTH
7467
7468           cpi->summedp_quality += frame_ssim2 * weight;
7469           cpi->summedp_weights += weight;
7470 #if 0
7471           if (cm->show_frame) {
7472             FILE *f = fopen("q_used.stt", "a");
7473             fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
7474                     cpi->common.current_video_frame, psnr2.psnr[1],
7475                     psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
7476             fclose(f);
7477           }
7478 #endif
7479         }
7480       }
7481       if (cpi->b_calculate_blockiness) {
7482 #if CONFIG_VP9_HIGHBITDEPTH
7483         if (!cm->use_highbitdepth)
7484 #endif
7485         {
7486           double frame_blockiness = vp9_get_blockiness(
7487               cpi->Source->y_buffer, cpi->Source->y_stride,
7488               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7489               cpi->Source->y_width, cpi->Source->y_height);
7490           cpi->worst_blockiness =
7491               VPXMAX(cpi->worst_blockiness, frame_blockiness);
7492           cpi->total_blockiness += frame_blockiness;
7493         }
7494       }
7495
7496       if (cpi->b_calculate_consistency) {
7497 #if CONFIG_VP9_HIGHBITDEPTH
7498         if (!cm->use_highbitdepth)
7499 #endif
7500         {
7501           double this_inconsistency = vpx_get_ssim_metrics(
7502               cpi->Source->y_buffer, cpi->Source->y_stride,
7503               cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
7504               cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
7505               &cpi->metrics, 1);
7506
7507           const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
7508           double consistency =
7509               vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency);
7510           if (consistency > 0.0)
7511             cpi->worst_consistency =
7512                 VPXMIN(cpi->worst_consistency, consistency);
7513           cpi->total_inconsistency += this_inconsistency;
7514         }
7515       }
7516
7517       {
7518         double y, u, v, frame_all;
7519         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
7520                                       &v, bit_depth, in_bit_depth);
7521         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
7522       }
7523       {
7524         double y, u, v, frame_all;
7525         frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,
7526                                 bit_depth, in_bit_depth);
7527         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
7528       }
7529     }
7530   }
7531
7532 #endif
7533
7534   if (is_one_pass_cbr_svc(cpi)) {
7535     if (cm->show_frame) {
7536       ++cpi->svc.spatial_layer_to_encode;
7537       if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
7538         cpi->svc.spatial_layer_to_encode = 0;
7539     }
7540   }
7541
7542   vpx_clear_system_state();
7543   return 0;
7544 }
7545
7546 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
7547                               vp9_ppflags_t *flags) {
7548   VP9_COMMON *cm = &cpi->common;
7549 #if !CONFIG_VP9_POSTPROC
7550   (void)flags;
7551 #endif
7552
7553   if (!cm->show_frame) {
7554     return -1;
7555   } else {
7556     int ret;
7557 #if CONFIG_VP9_POSTPROC
7558     ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width);
7559 #else
7560     if (cm->frame_to_show) {
7561       *dest = *cm->frame_to_show;
7562       dest->y_width = cm->width;
7563       dest->y_height = cm->height;
7564       dest->uv_width = cm->width >> cm->subsampling_x;
7565       dest->uv_height = cm->height >> cm->subsampling_y;
7566       ret = 0;
7567     } else {
7568       ret = -1;
7569     }
7570 #endif  // !CONFIG_VP9_POSTPROC
7571     vpx_clear_system_state();
7572     return ret;
7573   }
7574 }
7575
7576 int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
7577                           VPX_SCALING vert_mode) {
7578   VP9_COMMON *cm = &cpi->common;
7579   int hr = 0, hs = 0, vr = 0, vs = 0;
7580
7581   if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
7582
7583   Scale2Ratio(horiz_mode, &hr, &hs);
7584   Scale2Ratio(vert_mode, &vr, &vs);
7585
7586   // always go to the next whole number
7587   cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
7588   cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
7589   if (cm->current_video_frame) {
7590     assert(cm->width <= cpi->initial_width);
7591     assert(cm->height <= cpi->initial_height);
7592   }
7593
7594   update_frame_size(cpi);
7595
7596   return 0;
7597 }
7598
7599 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
7600                          unsigned int height) {
7601   VP9_COMMON *cm = &cpi->common;
7602 #if CONFIG_VP9_HIGHBITDEPTH
7603   check_initial_width(cpi, cm->use_highbitdepth, 1, 1);
7604 #else
7605   check_initial_width(cpi, 1, 1);
7606 #endif  // CONFIG_VP9_HIGHBITDEPTH
7607
7608 #if CONFIG_VP9_TEMPORAL_DENOISING
7609   setup_denoiser_buffer(cpi);
7610 #endif
7611
7612   if (width) {
7613     cm->width = width;
7614     if (cm->width > cpi->initial_width) {
7615       cm->width = cpi->initial_width;
7616       printf("Warning: Desired width too large, changed to %d\n", cm->width);
7617     }
7618   }
7619
7620   if (height) {
7621     cm->height = height;
7622     if (cm->height > cpi->initial_height) {
7623       cm->height = cpi->initial_height;
7624       printf("Warning: Desired height too large, changed to %d\n", cm->height);
7625     }
7626   }
7627   assert(cm->width <= cpi->initial_width);
7628   assert(cm->height <= cpi->initial_height);
7629
7630   update_frame_size(cpi);
7631
7632   return 0;
7633 }
7634
7635 void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
7636   cpi->use_svc = use_svc;
7637   return;
7638 }
7639
7640 int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; }
7641
7642 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
7643   if (flags &
7644       (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) {
7645     int ref = 7;
7646
7647     if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP9_LAST_FLAG;
7648
7649     if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG;
7650
7651     if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP9_ALT_FLAG;
7652
7653     vp9_use_as_reference(cpi, ref);
7654   }
7655
7656   if (flags &
7657       (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
7658        VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) {
7659     int upd = 7;
7660
7661     if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP9_LAST_FLAG;
7662
7663     if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG;
7664
7665     if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP9_ALT_FLAG;
7666
7667     vp9_update_reference(cpi, upd);
7668   }
7669
7670   if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
7671     vp9_update_entropy(cpi, 0);
7672   }
7673 }
7674
7675 void vp9_set_row_mt(VP9_COMP *cpi) {
7676   // Enable row based multi-threading for supported modes of encoding
7677   cpi->row_mt = 0;
7678   if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
7679        cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
7680       cpi->oxcf.row_mt && !cpi->use_svc)
7681     cpi->row_mt = 1;
7682
7683   if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
7684       (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt &&
7685       !cpi->use_svc)
7686     cpi->row_mt = 1;
7687
7688   // In realtime mode, enable row based multi-threading for all the speed levels
7689   // where non-rd path is used.
7690   if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) {
7691     cpi->row_mt = 1;
7692   }
7693
7694   if (cpi->row_mt)
7695     cpi->row_mt_bit_exact = 1;
7696   else
7697     cpi->row_mt_bit_exact = 0;
7698 }