From 5cc8df5bcfe49fbfca21ee57401c7807c048751b Mon Sep 17 00:00:00 2001
From: Marco Paniconi <marpan@google.com>
Date: Wed, 4 Apr 2018 16:24:39 -0700
Subject: [PATCH] vp9-svc: Fix to first superframe when inter_layer is off.

When the application selects the setting INTER_LAYER_PRED_OFF
each spatial stream should be decodeable separately.
For this we need to force key frames on all spatial layers
on the first superframe.

In order to maintain the quality at the beginning of the stream
the active_worst for spatial layer of the second superframe is set
to the last_QP of the correspondng spatial layer of the first superframe.
Also make sure nonrd_keyframe is set for non-base spatial layers.

Change only affects SVC mode wit number_spatial_layers > 1 and
svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF.
And only affects first and second frame of sequence.

Change-Id: I8ee9a0873ab1d3a02515774571f719617771ad41
---
 vp9/encoder/vp9_ratectrl.c       | 31 ++++++++++++++++++++++++++++---
 vp9/encoder/vp9_speed_features.c |  3 +++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index b5f1a5c5c..d7f677d66 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -702,6 +702,14 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
   int active_worst_quality;
   int ambient_qp;
   unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers;
+  // For SVC: when inter layer prediction is off, on the second superframe
+  // (superframe = 1) use the QP from the previous superframe. This needed to
+  // maintain quality since we can't temporally predict from the very first
+  // superframe.
+  if (cpi->use_svc && cpi->svc.current_superframe == 1 &&
+      cpi->svc.number_spatial_layers > 1 &&
+      cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF)
+    return rc->last_q[KEY_FRAME];
   if (cm->frame_type == KEY_FRAME || rc->reset_high_source_sad)
     return rc->worst_quality;
   // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME]
@@ -1727,8 +1735,13 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
       LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id,
                        cpi->svc.number_temporal_layers);
   // Periodic key frames is based on the super-frame counter
-  // (svc.current_superframe), also only base spatial layer is key frame.
-  if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
+  // (svc.current_superframe), also only base spatial layer is key frame unless
+  // svc.disable_inter_layer_pred = INTER_LAYER_PRED_OFF.
+  int first_frame_iskey =
+      (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF)
+          ? (cpi->svc.current_superframe == 0)
+          : (cm->current_video_frame == 0);
+  if (first_frame_iskey || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       (cpi->oxcf.auto_key &&
        (cpi->svc.current_superframe % cpi->oxcf.key_freq == 0) &&
        cpi->svc.spatial_layer_id == 0)) {
@@ -1738,7 +1751,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
       cpi->svc.layer_context[layer].is_key_frame = 1;
       cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
     } else if (is_one_pass_cbr_svc(cpi)) {
-      if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi);
+      if (!first_frame_iskey) vp9_svc_reset_key_frame(cpi);
       layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
                                cpi->svc.temporal_layer_id,
                                cpi->svc.number_temporal_layers);
@@ -1750,6 +1763,18 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
     }
   } else {
     cm->frame_type = INTER_FRAME;
+    // Special case for first superframe when inter_layer prediction is off.
+    // Force key frame on base spatial layer for second superframe.
+    // TODO(marpan): This condition may be removed, depending on #spatial_layer
+    // and scaling factor, but keep it for now for simplicity.
+    if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF &&
+        cpi->svc.current_superframe == 1 && cpi->svc.spatial_layer_id == 0 &&
+        cpi->svc.number_spatial_layers > 1) {
+      vp9_svc_reset_key_frame(cpi);
+      cm->frame_type = KEY_FRAME;
+      cpi->svc.layer_context[layer].is_key_frame = 1;
+      target = calc_iframe_target_size_one_pass_cbr(cpi);
+    }
     if (is_two_pass_svc(cpi)) {
       LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
       if (cpi->svc.spatial_layer_id == 0) {
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 879cd2070..746fa36e9 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -531,6 +531,9 @@ static void set_rt_speed_feature_framesize_independent(
       sf->limit_newmv_early_exit = 1;
       if (!cpi->use_svc) sf->bias_golden = 1;
     }
+    // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent large
+    // encoding time.
+    if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
   }
 
   if (speed >= 6) {
-- 
2.40.0