vp9: Allow for overshoot detection for non-screen CBR mode.

author Marco Paniconi <marpan@google.com>

Wed, 8 Aug 2018 21:01:26 +0000 (14:01 -0700)

committer Marco Paniconi <marpan@google.com>

Fri, 10 Aug 2018 00:38:20 +0000 (17:38 -0700)
author Marco Paniconi <marpan@google.com>
Wed, 8 Aug 2018 21:01:26 +0000 (14:01 -0700)
committer Marco Paniconi <marpan@google.com>
Fri, 10 Aug 2018 00:38:20 +0000 (17:38 -0700)
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c

index 3db11fcb0269e945304f71e4d233b49ed2db7bb2..335a0b292ceb800f92da1404018837ac66fc8450 100644 (file)
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -4052,13 +4052,12 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
      vp9_svc_assert_constraints_pattern(cpi);
    }
  
-  if (!cpi->sf.re_encode_overshoot_rt &&
-      cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+  // Check if this high_source_sad (scene/slide change) frame should be
+  // encoded at high/max QP, and if so, set the q and adjust some rate
+  // control parameters.
+  if (cpi->sf.overshoot_detection_rt == 1 &&
        (cpi->rc.high_source_sad ||
         (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
-    // Check if this high_source_sad (scene/slide change) frame should be
-    // encoded at high/max QP, and if so, set the q and adjust some rate
-    // control parameters.
      if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
        vp9_set_quantizer(cm, q);
        vp9_set_variance_partition_thresholds(cpi, q, 0);
@@ -4087,10 +4086,11 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
  
    vp9_encode_frame(cpi);
  
-  // Check if we should drop this frame because of high overshoot.
-  // Only for frames where high temporal-source SAD is detected.
+  // Check if we should re-encode this frame at high Q because of high
+  // overshoot based on the encoded frame size. Only for frames where
+  // high temporal-source SAD is detected.
    // For SVC: all spatial layers are checked for re-encoding.
-  if (cpi->sf.re_encode_overshoot_rt &&
+  if (cpi->sf.overshoot_detection_rt == 2 &&
        (cpi->rc.high_source_sad ||
         (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
      int frame_size = 0;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c

index e0c877833318c41c0027de364572b7ff5da9310c..64db18f09687c3e6c443894c1945764353eddda4 100644 (file)
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2648,10 +2648,8 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
      float thresh = 8.0f;
      uint32_t thresh_key = 140000;
      if (cpi->oxcf.speed <= 5) thresh_key = 240000;
-    if (cpi->oxcf.rc_mode == VPX_VBR) {
-      min_thresh = 65000;
-      thresh = 2.1f;
-    }
+    if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) min_thresh = 65000;
+    if (cpi->oxcf.rc_mode == VPX_VBR) thresh = 2.1f;
      if (cpi->use_svc && cpi->svc.number_spatial_layers > 1) {
        const int aligned_width = ALIGN_POWER_OF_TWO(src_width, MI_SIZE_LOG2);
        const int aligned_height = ALIGN_POWER_OF_TWO(src_height, MI_SIZE_LOG2);
@@ -2822,14 +2820,16 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
    SPEED_FEATURES *const sf = &cpi->sf;
    int thresh_qp = 7 * (rc->worst_quality >> 3);
    int thresh_rate = rc->avg_frame_bandwidth << 3;
-  // Lower rate threshold for video.
+  // Lower thresh_qp for video (more overshoot at lower Q) to be
+  // more conservative for video.
    if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
-    thresh_rate = rc->avg_frame_bandwidth << 2;
+    thresh_qp = rc->worst_quality >> 1;
    // If this decision is not based on an encoded frame size but just on
-  // scene/slide change detection (i.e., re_encode_overshoot_rt = 0), adjust the
-  // qp_thresh and skip the (frame_size > thresh_rate) condition in this case.
-  if (!sf->re_encode_overshoot_rt) thresh_qp = 3 * (rc->worst_quality >> 2);
-  if ((!sf->re_encode_overshoot_rt || frame_size > thresh_rate) &&
+  // scene/slide change detection (i.e., re_encode_overshoot_rt = 1),
+  // for now skip the (frame_size > thresh_rate) condition in this case.
+  // TODO(marpan): Use a better size/rate condition for this case and
+  // adjust thresholds.
+  if ((sf->overshoot_detection_rt == 1 || frame_size > thresh_rate) &&
        cm->base_qindex < thresh_qp) {
      double rate_correction_factor =
          cpi->rc.rate_correction_factors[INTER_NORMAL];
@@ -2846,7 +2846,7 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
      // and the encoded frame used alot of Intra modes, then force hybrid_intra
      // encoding for the re-encode on this scene change. hybrid_intra will
      // use rd-based intra mode selection for small blocks.
-    if (sf->re_encode_overshoot_rt && frame_size > (thresh_rate << 1) &&
+    if (sf->overshoot_detection_rt == 2 && frame_size > (thresh_rate << 1) &&
          cpi->svc.spatial_layer_id == 0) {
        MODE_INFO **mi = cm->mi_grid_visible;
        int sum_intra_usage = 0;
@@ -2900,8 +2900,8 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
          LAYER_CONTEXT *lc = &svc->layer_context[layer];
          RATE_CONTROL *lrc = &lc->rc;
          lrc->avg_frame_qindex[INTER_FRAME] = *q;
-        lrc->buffer_level = rc->optimal_buffer_level;
-        lrc->bits_off_target = rc->optimal_buffer_level;
+        lrc->buffer_level = lrc->optimal_buffer_level;
+        lrc->bits_off_target = lrc->optimal_buffer_level;
          lrc->rc_1_frame = 0;
          lrc->rc_2_frame = 0;
          lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c

index 859e626bb0411ff6f579a0a8315a186d7323a179..107da219475922a71a8a2bd2d738adcde254b277 100644 (file)
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -405,7 +405,7 @@ static void set_rt_speed_feature_framesize_independent(
    sf->use_compound_nonrd_pickmode = 0;
    sf->nonrd_keyframe = 0;
    sf->svc_use_lowres_part = 0;
-  sf->re_encode_overshoot_rt = 0;
+  sf->overshoot_detection_rt = 0;
    sf->disable_16x16part_nonkey = 0;
    sf->disable_golden_ref = 0;
    sf->enable_tpl_model = 0;
@@ -570,11 +570,9 @@ static void set_rt_speed_feature_framesize_independent(
      // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent
      // increase in encoding time.
      if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
-    if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
-        cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
-        (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) {
-      sf->re_encode_overshoot_rt = 1;
-    }
+    if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
+        cpi->oxcf.rc_mode == VPX_CBR)
+      sf->overshoot_detection_rt = 1;
      if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
          cm->width <= 1280 && cm->height <= 720) {
        sf->use_altref_onepass = 1;
@@ -583,7 +581,6 @@ static void set_rt_speed_feature_framesize_independent(
    }
  
    if (speed >= 6) {
-    sf->re_encode_overshoot_rt = 0;
      if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) {
        sf->use_altref_onepass = 1;
        sf->use_compound_nonrd_pickmode = 1;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h

index 89fc82ebf6ee929be398192308a3a706a1dcfdbb..7430f0a33f62e0451dc1f238f6bf3872aa6d5f07 100644 (file)
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -542,9 +542,17 @@ typedef struct SPEED_FEATURES {
    // For SVC: enables use of partition from lower spatial resolution.
    int svc_use_lowres_part;
  
-  // Enable re-encoding on scene change with potential high overshoot,
-  // for real-time encoding flow.
-  int re_encode_overshoot_rt;
+  // Flag to indicate process for handling overshoot on slide/scene change,
+  // for real-time CBR mode.
+  // 0: no reaction to rate control on a detected slide/scene change
+  // (prior to encoding the frame).
+  // 1: set to larger Q based only on the detected slide/scene change
+  // and current/past Q. No second pass encoding, so faster than option 2.
+  // 2: based on (first pass) encoded frame, if large frame size is detected
+  // then set to higher Q for second encode. This involves 2 pass encoding
+  // on slide change, so slower than 1, but more accurate for detecting
+  // overshoot.
+  int overshoot_detection_rt;
  
    // Disable partitioning of 16x16 blocks.
    int disable_16x16part_nonkey;
author	Marco Paniconi <marpan@google.com>
	Wed, 8 Aug 2018 21:01:26 +0000 (14:01 -0700)
committer	Marco Paniconi <marpan@google.com>
	Fri, 10 Aug 2018 00:38:20 +0000 (17:38 -0700)
vp9/encoder/vp9_encoder.c		patch \| blob \| history
vp9/encoder/vp9_ratectrl.c		patch \| blob \| history
vp9/encoder/vp9_speed_features.c		patch \| blob \| history
vp9/encoder/vp9_speed_features.h		patch \| blob \| history