Vidyo patch: Rate control for SVC, 1 pass CBR mode.

[libvpx] / vp9 / encoder / vp9_encodeframe.c
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c

index 4d1964d63397e751a427424d1645fa3acb94614e..4355ec60e1adad7bc153a679d73e2e644c115895 100644 (file)
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -13,8 +13,10 @@
  #include <stdio.h>
  
  #include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
  #include "./vpx_config.h"
  
+#include "vpx_ports/mem.h"
  #include "vpx_ports/vpx_timer.h"
  
  #include "vp9/common/vp9_common.h"
@@ -360,7 +362,7 @@ static void get_variance(var *v) {
        ((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count);
  }
  
-void sum_2_variances(const var *a, const var *b, var *r) {
+static void sum_2_variances(const var *a, const var *b, var *r) {
    assert(a->log2_count == b->log2_count);
    fill_variance(a->sum_square_error + b->sum_square_error,
                  a->sum_error + b->sum_error, a->log2_count + 1, r);
@@ -462,45 +464,55 @@ static int set_vt_partitioning(VP9_COMP *cpi,
    return 0;
  }
  
-void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
+// Set the variance split thresholds for following the block sizes:
+// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
+// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
+// currently only used on key frame.
+static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int is_key_frame = (cm->frame_type == KEY_FRAME);
+  const int threshold_multiplier = is_key_frame ? 20 : 1;
+  const int64_t threshold_base = (int64_t)(threshold_multiplier *
+      cpi->y_dequant[q][1]);
+  if (is_key_frame) {
+    thresholds[0] = threshold_base;
+    thresholds[1] = threshold_base >> 2;
+    thresholds[2] = threshold_base >> 2;
+    thresholds[3] = threshold_base << 2;
+  } else {
+    thresholds[1] = threshold_base;
+    if (cm->width <= 352 && cm->height <= 288) {
+      thresholds[0] = threshold_base >> 2;
+      thresholds[2] = threshold_base << 3;
+    } else {
+      thresholds[0] = threshold_base;
+      thresholds[1] = (5 * threshold_base) >> 2;
+      if (cm->width >= 1920 && cm->height >= 1080)
+        thresholds[1] = (7 * threshold_base) >> 2;
+      thresholds[2] = threshold_base << cpi->oxcf.speed;
+    }
+  }
+}
+
+void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) {
+  VP9_COMMON *const cm = &cpi->common;
    SPEED_FEATURES *const sf = &cpi->sf;
+  const int is_key_frame = (cm->frame_type == KEY_FRAME);
    if (sf->partition_search_type != VAR_BASED_PARTITION &&
        sf->partition_search_type != REFERENCE_PARTITION) {
      return;
    } else {
-    VP9_COMMON *const cm = &cpi->common;
-    const int is_key_frame = (cm->frame_type == KEY_FRAME);
-    const int threshold_multiplier = is_key_frame ? 20 : 1;
-    const int64_t threshold_base = (int64_t)(threshold_multiplier *
-        cpi->y_dequant[q][1]);
-
-    // TODO(marpan): Allow 4x4 partitions for inter-frames.
-    // use_4x4_partition = (variance4x4downsample[i2 + j] == 1);
-    // If 4x4 partition is not used, then 8x8 partition will be selected
-    // if variance of 16x16 block is very high, so use larger threshold
-    // for 16x16 (threshold_bsize_min) in that case.
-
-    // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
-    // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
+    set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
+    // The thresholds below are not changed locally.
      if (is_key_frame) {
-      cpi->vbp_thresholds[0] = threshold_base;
-      cpi->vbp_thresholds[1] = threshold_base >> 2;
-      cpi->vbp_thresholds[2] = threshold_base >> 2;
-      cpi->vbp_thresholds[3] = threshold_base << 2;
        cpi->vbp_threshold_sad = 0;
        cpi->vbp_bsize_min = BLOCK_8X8;
      } else {
-      cpi->vbp_thresholds[1] = threshold_base;
-      if (cm->width <= 352 && cm->height <= 288) {
-        cpi->vbp_thresholds[0] = threshold_base >> 2;
-        cpi->vbp_thresholds[2] = threshold_base << 3;
+      if (cm->width <= 352 && cm->height <= 288)
          cpi->vbp_threshold_sad = 100;
-      } else {
-        cpi->vbp_thresholds[0] = threshold_base;
-        cpi->vbp_thresholds[1] = (5 * threshold_base) >> 2;
-        cpi->vbp_thresholds[2] = threshold_base << cpi->oxcf.speed;
-        cpi->vbp_threshold_sad = 1000;
-      }
+      else
+        cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ?
+            (cpi->y_dequant[q][1] << 1) : 1000;
        cpi->vbp_bsize_min = BLOCK_16X16;
      }
      cpi->vbp_threshold_minmax = 15 + (q >> 3);
@@ -549,23 +561,6 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
    return (minmax_max - minmax_min);
  }
  
-static void modify_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
-  VP9_COMMON *const cm = &cpi->common;
-  const int64_t threshold_base = (int64_t)(cpi->y_dequant[q][1]);
-
-  // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
-  // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
-  thresholds[1] = threshold_base;
-  if (cm->width <= 352 && cm->height <= 288) {
-    thresholds[0] = threshold_base >> 2;
-    thresholds[2] = threshold_base << 3;
-  } else {
-    thresholds[0] = threshold_base;
-    thresholds[1] = (5 * threshold_base) >> 2;
-    thresholds[2] = threshold_base << cpi->oxcf.speed;
-  }
-}
-
  static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
                                   int dp, int x8_idx, int y8_idx, v8x8 *vst,
  #if CONFIG_VP9_HIGHBITDEPTH
@@ -678,7 +673,7 @@ static int choose_partitioning(VP9_COMP *cpi,
  
      if (cyclic_refresh_segment_id_boosted(segment_id)) {
        int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-      modify_vbp_thresholds(cpi, thresholds, q);
+      set_vbp_thresholds(cpi, thresholds, q);
      }
    }
  
@@ -692,17 +687,28 @@ static int choose_partitioning(VP9_COMP *cpi,
    s = x->plane[0].src.buf;
    sp = x->plane[0].src.stride;
  
-  if (!is_key_frame) {
+  if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) &&
+      cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+    // In the case of spatial/temporal scalable coding, the assumption here is
+    // that the temporal reference frame will always be of type LAST_FRAME.
+    // TODO(marpan): If that assumption is broken, we need to revisit this code.
      MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
      unsigned int uv_sad;
      const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
  
-    const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+    const YV12_BUFFER_CONFIG *yv12_g = NULL;
      unsigned int y_sad, y_sad_g;
      const BLOCK_SIZE bsize = BLOCK_32X32
          + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
  
      assert(yv12 != NULL);
+
+    if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
+      // For now, GOLDEN will not be used for non-zero spatial layers, since
+      // it may not be a temporal reference.
+      yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+    }
+
      if (yv12_g && yv12_g != yv12) {
        vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
                             &cm->frame_refs[GOLDEN_FRAME - 1].sf);
@@ -722,7 +728,7 @@ static int choose_partitioning(VP9_COMP *cpi,
      mbmi->mv[0].as_int = 0;
      mbmi->interp_filter = BILINEAR;
  
-    y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+    y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
      if (y_sad_g < y_sad) {
        vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
                             &cm->frame_refs[GOLDEN_FRAME - 1].sf);
@@ -1036,8 +1042,8 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
    }
  
    x->skip = ctx->skip;
-  vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
-             sizeof(uint8_t) * ctx->num_4x4_blk);
+  memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
+         sizeof(uint8_t) * ctx->num_4x4_blk);
  
    if (!output_enabled)
      return;
@@ -1341,22 +1347,22 @@ static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
    int mi_width = num_8x8_blocks_wide_lookup[bsize];
    int mi_height = num_8x8_blocks_high_lookup[bsize];
    for (p = 0; p < MAX_MB_PLANE; p++) {
-    vpx_memcpy(
+    memcpy(
          xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
          a + num_4x4_blocks_wide * p,
          (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
          xd->plane[p].subsampling_x);
-    vpx_memcpy(
+    memcpy(
          xd->left_context[p]
              + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
          l + num_4x4_blocks_high * p,
          (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
          xd->plane[p].subsampling_y);
    }
-  vpx_memcpy(xd->above_seg_context + mi_col, sa,
-             sizeof(*xd->above_seg_context) * mi_width);
-  vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
-             sizeof(xd->left_seg_context[0]) * mi_height);
+  memcpy(xd->above_seg_context + mi_col, sa,
+         sizeof(*xd->above_seg_context) * mi_width);
+  memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
+         sizeof(xd->left_seg_context[0]) * mi_height);
  }
  
  static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
@@ -1373,22 +1379,22 @@ static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
  
    // buffer the above/left context information of the block in search.
    for (p = 0; p < MAX_MB_PLANE; ++p) {
-    vpx_memcpy(
+    memcpy(
          a + num_4x4_blocks_wide * p,
          xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
          (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
          xd->plane[p].subsampling_x);
-    vpx_memcpy(
+    memcpy(
          l + num_4x4_blocks_high * p,
          xd->left_context[p]
              + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
          (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
          xd->plane[p].subsampling_y);
    }
-  vpx_memcpy(sa, xd->above_seg_context + mi_col,
-             sizeof(*xd->above_seg_context) * mi_width);
-  vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
-             sizeof(xd->left_seg_context[0]) * mi_height);
+  memcpy(sa, xd->above_seg_context + mi_col,
+         sizeof(*xd->above_seg_context) * mi_width);
+  memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
+         sizeof(xd->left_seg_context[0]) * mi_height);
  }
  
  static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
@@ -1595,7 +1601,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
      int use32x32 = 0;
      unsigned int thr = cpi->source_var_thresh;
  
-    vpx_memset(d32, 0, 4 * sizeof(diff));
+    memset(d32, 0, 4 * sizeof(diff));
  
      for (i = 0; i < 4; i++) {
        diff *d16[4];
@@ -2157,7 +2163,6 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
    int bh, bw;
    BLOCK_SIZE min_size = BLOCK_4X4;
    BLOCK_SIZE max_size = BLOCK_64X64;
-  int i = 0;
    int bs_hist[BLOCK_SIZES] = {0};
  
    // Trap case where we do not have a prediction.
@@ -2187,36 +2192,10 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
                                    bs_hist);
      }
  
-    // adjust observed min and max
+    // Adjust observed min and max for "relaxed" auto partition case.
      if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
        min_size = min_partition_size[min_size];
        max_size = max_partition_size[max_size];
-    } else if (cpi->sf.auto_min_max_partition_size ==
-               CONSTRAIN_NEIGHBORING_MIN_MAX) {
-      // adjust the search range based on the histogram of the observed
-      // partition sizes from left, above the previous co-located blocks
-      int sum = 0;
-      int first_moment = 0;
-      int second_moment = 0;
-      int var_unnormalized = 0;
-
-      for (i = 0; i < BLOCK_SIZES; i++) {
-        sum += bs_hist[i];
-        first_moment += bs_hist[i] * i;
-        second_moment += bs_hist[i] * i * i;
-      }
-
-      // if variance is small enough,
-      // adjust the range around its mean size, which gives a tighter range
-      var_unnormalized = second_moment - first_moment * first_moment / sum;
-      if (var_unnormalized <= 4 * sum) {
-        int mean = first_moment / sum;
-        min_size = min_partition_size[mean];
-        max_size = max_partition_size[mean];
-      } else {
-        min_size = min_partition_size[min_size];
-        max_size = max_partition_size[max_size];
-      }
      }
    }
  
@@ -2224,7 +2203,7 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
    max_size = find_partition_size(max_size,
                                   row8x8_remaining, col8x8_remaining,
                                   &bh, &bw);
-  min_size = MIN(min_size, max_size);
+  min_size = MIN(cpi->sf.rd_auto_partition_min_limit, MIN(min_size, max_size));
  
    // When use_square_partition_only is true, make sure at least one square
    // partition is allowed by selecting the next smaller square size as
@@ -2353,11 +2332,11 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd,
  }
  
  static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
-  vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
+  memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
  }
  
  static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
-  vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
+  memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
  }
  
  #if CONFIG_FP_MB_STATS
@@ -2832,8 +2811,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi,
    int mi_col;
  
    // Initialize the left context for the new SB row
-  vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
-  vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
+  memset(&xd->left_context, 0, sizeof(xd->left_context));
+  memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
  
    // Code each SB in the row
    for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
@@ -2917,11 +2896,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
  
    // Note: this memset assumes above_context[0], [1] and [2]
    // are allocated as part of the same buffer.
-  vpx_memset(xd->above_context[0], 0,
-             sizeof(*xd->above_context[0]) *
-             2 * aligned_mi_cols * MAX_MB_PLANE);
-  vpx_memset(xd->above_seg_context, 0,
-             sizeof(*xd->above_seg_context) * aligned_mi_cols);
+  memset(xd->above_context[0], 0,
+         sizeof(*xd->above_context[0]) *
+         2 * aligned_mi_cols * MAX_MB_PLANE);
+  memset(xd->above_seg_context, 0,
+         sizeof(*xd->above_seg_context) * aligned_mi_cols);
  }
  
  static int check_dual_ref_flags(VP9_COMP *cpi) {
@@ -3594,8 +3573,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
    int mi_col;
  
    // Initialize the left context for the new SB row
-  vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
-  vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
+  memset(&xd->left_context, 0, sizeof(xd->left_context));
+  memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
  
    // Code each SB in the row
    for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
@@ -3683,13 +3662,13 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
    const int cutoff = (MIN(cm->width, cm->height) >= 720) ?
        (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) :
        (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
-  DECLARE_ALIGNED_ARRAY(16, int, hist, VAR_HIST_BINS);
+  DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
    diff *var16 = cpi->source_diff_var;
  
    int sum = 0;
    int i, j;
  
-  vpx_memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
+  memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
  
    for (i = 0; i < cm->mb_rows; i++) {
      for (j = 0; j < cm->mb_cols; j++) {
@@ -3697,15 +3676,15 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
        if (cm->use_highbitdepth) {
          switch (cm->bit_depth) {
            case VPX_BITS_8:
-            vp9_highbd_get16x16var(src, src_stride, last_src, last_stride,
+            vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
                                     &var16->sse, &var16->sum);
              break;
            case VPX_BITS_10:
-            vp9_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
+            vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
                                      &var16->sse, &var16->sum);
              break;
            case VPX_BITS_12:
-            vp9_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
+            vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
                                        &var16->sse, &var16->sum);
              break;
            default:
@@ -3714,11 +3693,11 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
              return -1;
          }
        } else {
-        vp9_get16x16var(src, src_stride, last_src, last_stride,
+        vpx_get16x16var(src, src_stride, last_src, last_stride,
                          &var16->sse, &var16->sum);
        }
  #else
-      vp9_get16x16var(src, src_stride, last_src, last_stride,
+      vpx_get16x16var(src, src_stride, last_src, last_stride,
                        &var16->sse, &var16->sum);
  #endif  // CONFIG_VP9_HIGHBITDEPTH
        var16->var = var16->sse -
@@ -4187,7 +4166,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
                     cpi->sf.allow_skip_recode;
  
    if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
-    vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
+    memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
  
    x->skip_optimize = ctx->is_coded;
    ctx->is_coded = 1;