vp9-svc: Improve frame dropper for spatial layers.

author Marco Paniconi <marpan@google.com>

Mon, 19 Mar 2018 03:46:30 +0000 (20:46 -0700)

committer Marco Paniconi <marpan@google.com>

Tue, 20 Mar 2018 17:34:45 +0000 (10:34 -0700)
author Marco Paniconi <marpan@google.com>
Mon, 19 Mar 2018 03:46:30 +0000 (20:46 -0700)
committer Marco Paniconi <marpan@google.com>
Tue, 20 Mar 2018 17:34:45 +0000 (10:34 -0700)
diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c

index 5b98ce7affd9a0fb0d4b8a8d832de82f4a601d9b..e93f0634ef67ecc5accbe67997550cef0462dc9a 100644 (file)
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -503,10 +503,8 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
    printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
           rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
           perc_fluctuation);
-  if (frame_cnt != tot_num_frames)
-    die("Error: Number of input frames not equal to output encoded frames != "
-        "%d tot_num_frames = %d\n",
-        frame_cnt, tot_num_frames);
+  printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt,
+         tot_num_frames);
  }
  
  vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
@@ -784,8 +782,10 @@ int main(int argc, const char **argv) {
            if (cx_pkt->data.frame.sz > 0) {
  #if OUTPUT_RC_STATS
              uint64_t sizes[8];
+            uint64_t sizes_parsed[8];
              int count = 0;
              vp9_zero(sizes);
+            vp9_zero(sizes_parsed);
  #endif
              vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf,
                                           cx_pkt->data.frame.sz,
@@ -795,18 +795,21 @@ int main(int argc, const char **argv) {
              if (svc_ctx.output_rc_stat) {
                vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
                parse_superframe_index(cx_pkt->data.frame.buf,
-                                     cx_pkt->data.frame.sz, sizes, &count);
+                                     cx_pkt->data.frame.sz, sizes_parsed,
+                                     &count);
                if (enc_cfg.ss_number_layers == 1)
                  sizes[0] = cx_pkt->data.frame.sz;
-              // Note computing input_layer_frames here won't account for frame
-              // drops in rate control stats.
-              // TODO(marpan): Fix this for non-bypass mode so we can get stats
-              // for dropped frames.
                if (svc_ctx.temporal_layering_mode !=
                    VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+                int num_layers_encoded = 0;
                  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                    ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
                                            layer_id.temporal_layer_id];
+                  sizes[sl] = 0;
+                  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
+                    sizes[sl] = sizes_parsed[num_layers_encoded];
+                    num_layers_encoded++;
+                  }
                  }
                }
                for (tl = layer_id.temporal_layer_id;
@@ -817,20 +820,22 @@ int main(int argc, const char **argv) {
                }
  
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                for (tl = layer_id.temporal_layer_id;
-                     tl < enc_cfg.ts_number_layers; ++tl) {
-                  const int layer = sl * enc_cfg.ts_number_layers + tl;
-                  ++rc.layer_tot_enc_frames[layer];
-                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
-                  // Keep count of rate control stats per layer, for non-key
-                  // frames.
-                  if (tl == (unsigned int)layer_id.temporal_layer_id &&
-                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
-                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
-                    rc.layer_avg_rate_mismatch[layer] +=
-                        fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
-                        rc.layer_pfb[layer];
-                    ++rc.layer_enc_frames[layer];
+                if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
+                  for (tl = layer_id.temporal_layer_id;
+                       tl < enc_cfg.ts_number_layers; ++tl) {
+                    const int layer = sl * enc_cfg.ts_number_layers + tl;
+                    ++rc.layer_tot_enc_frames[layer];
+                    rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
+                    // Keep count of rate control stats per layer, for non-key
+                    // frames.
+                    if (tl == (unsigned int)layer_id.temporal_layer_id &&
+                        !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
+                      rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
+                      rc.layer_avg_rate_mismatch[layer] +=
+                          fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
+                          rc.layer_pfb[layer];
+                      ++rc.layer_enc_frames[layer];
+                    }
                    }
                  }
                }
@@ -840,7 +845,8 @@ int main(int argc, const char **argv) {
                // Ignore first window segment, due to key frame.
                if (frame_cnt > (unsigned int)rc.window_size) {
                  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
+                  if (cx_pkt->data.frame.spatial_layer_encoded[sl])
+                    sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
                  }
                  if (frame_cnt % rc.window_size == 0) {
                    rc.window_count += 1;
diff --git a/test/datarate_test.cc b/test/datarate_test.cc

index a506e55301f5b44cc382e1c000e3be9b28b10878..8dab2f17ea5da0f946fa2f55bb1a80dc930283c1 100644 (file)
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -1368,6 +1368,7 @@ class DatarateOnePassCbrSvc
      top_bitrate_ = 0;
      superframe_count_ = -1;
      key_frame_spacing_ = 9999;
+    num_nonref_frames_ = 0;
    }
    virtual void BeginPassHook(unsigned int /*pass*/) {}
  
@@ -1588,7 +1589,9 @@ class DatarateOnePassCbrSvc
  
    virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
      uint32_t sizes[8] = { 0 };
+    uint32_t sizes_parsed[8] = { 0 };
      int count = 0;
+    int num_layers_encoded = 0;
      last_pts_ = pkt->data.frame.pts;
      const bool key_frame =
          (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
@@ -1597,9 +1600,22 @@ class DatarateOnePassCbrSvc
        superframe_count_ = 0;
      }
      parse_superframe_index(static_cast<const uint8_t *>(pkt->data.frame.buf),
-                           pkt->data.frame.sz, sizes, &count);
+                           pkt->data.frame.sz, sizes_parsed, &count);
      // Count may be less than number of spatial layers because of frame drops.
-    ASSERT_LE(count, number_spatial_layers_);
+    for (int sl = 0; sl < number_spatial_layers_; ++sl) {
+      if (pkt->data.frame.spatial_layer_encoded[sl]) {
+        sizes[sl] = sizes_parsed[num_layers_encoded];
+        num_layers_encoded++;
+      }
+    }
+    ASSERT_EQ(count, num_layers_encoded);
+    // Keep track of number of non-reference frames, needed for mismatch check.
+    // Non-reference frames are top spatial and temporal layer frames,
+    // for TL > 0.
+    if (temporal_layer_id_ == number_temporal_layers_ - 1 &&
+        temporal_layer_id_ > 0 &&
+        pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1])
+      num_nonref_frames_++;
      for (int sl = 0; sl < number_spatial_layers_; ++sl) {
        sizes[sl] = sizes[sl] << 3;
        // Update the total encoded bits per layer.
@@ -1678,6 +1694,7 @@ class DatarateOnePassCbrSvc
    int top_bitrate_;
    int superframe_count_;
    int key_frame_spacing_;
+  unsigned int num_nonref_frames_;
  };
  
  // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1
@@ -1718,7 +1735,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TLScreenContent1) {
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
                            number_temporal_layers_, file_datarate_, 0.78, 1.15);
-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+#if CONFIG_VP9_DECODER
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
  }
  
  // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
@@ -1761,14 +1782,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL) {
                            layer_target_avg_bandwidth_, bits_in_buffer_model_);
      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
      CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
-                            number_temporal_layers_, file_datarate_, 0.78,
-                            1.15);
+                            number_temporal_layers_, file_datarate_, 0.75, 1.2);
  #if CONFIG_VP9_DECODER
-    // Number of temporal layers > 1, so half of the frames in this SVC pattern
-    // will be non-reference frame and hence encoder will avoid loopfilter.
-    // Since frame dropper is off, we can expect 200 (half of the sequence)
-    // mismatched frames.
-    EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
+    // The non-reference frames are expected to be mismatched frames as the
+    // encoder will avoid loopfilter on these frames.
+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
  #endif
    }
  }
@@ -1820,12 +1838,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) {
                                number_temporal_layers_, file_datarate_, 0.78,
                                1.15);
  #if CONFIG_VP9_DECODER
-      // Number of temporal layers > 1, so half of the frames in this SVC
-      // pattern
-      // will be non-reference frame and hence encoder will avoid loopfilter.
-      // Since frame dropper is off, we can expect 200 (half of the sequence)
-      // mismatched frames.
-      EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
+      // The non-reference frames are expected to be mismatched frames as the
+      // encoder will avoid loopfilter on these frames.
+      EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
  #endif
      }
    }
@@ -1874,6 +1889,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) {
      CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
                              number_temporal_layers_, file_datarate_, 0.78,
                              1.15);
+#if CONFIG_VP9_DECODER
+    // The non-reference frames are expected to be mismatched frames as the
+    // encoder will avoid loopfilter on these frames.
+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
    }
  }
  
@@ -1906,21 +1926,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL4Threads) {
    ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
    top_sl_width_ = 1280;
    top_sl_height_ = 720;
-  cfg_.rc_target_bitrate = 800;
-  ResetModel();
-  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
-                        cfg_.ts_number_layers, cfg_.temporal_layering_mode,
-                        layer_target_avg_bandwidth_, bits_in_buffer_model_);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
-                          number_temporal_layers_, file_datarate_, 0.78, 1.15);
+  for (int i = 200; i <= 800; i += 300) {
+    cfg_.rc_target_bitrate = i;
+    ResetModel();
+    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+                          cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+                          layer_target_avg_bandwidth_, bits_in_buffer_model_);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
+                            number_temporal_layers_, file_datarate_, 0.75, 1.2);
  #if CONFIG_VP9_DECODER
-  // Number of temporal layers > 1, so half of the frames in this SVC pattern
-  // will be non-reference frame and hence encoder will avoid loopfilter.
-  // Since frame dropper is off, we can expect 30 (half of the sequence)
-  // mismatched frames.
-  EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());
+    // The non-reference frames are expected to be mismatched frames as the
+    // encoder will avoid loopfilter on these frames.
+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
  #endif
+  }
  }
  
  // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
@@ -1964,11 +1984,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) {
    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
                            number_temporal_layers_, file_datarate_, 0.78, 1.15);
  #if CONFIG_VP9_DECODER
-  // Number of temporal layers > 1, so half of the frames in this SVC pattern
-  // will be non-reference frame and hence encoder will avoid loopfilter.
-  // Since frame dropper is off, we can expect 200 (half of the sequence)
-  // mismatched frames.
-  EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
  #endif
  }
  
@@ -2014,11 +2032,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLDynamicBitrateChange) {
    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
                            number_temporal_layers_, file_datarate_, 0.78, 1.15);
  #if CONFIG_VP9_DECODER
-  // Number of temporal layers > 1, so half of the frames in this SVC pattern
-  // will be non-reference frame and hence encoder will avoid loopfilter.
-  // Since frame dropper is off, we can expect 200 (half of the sequence)
-  // mismatched frames.
-  EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
  #endif
  }
  
@@ -2066,11 +2082,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL2TLDynamicPatternChange) {
    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
                            number_temporal_layers_, file_datarate_, 0.78, 1.15);
  #if CONFIG_VP9_DECODER
-  // Number of temporal layers > 1, so half of the frames in this SVC pattern
-  // will be non-reference frame and hence encoder will avoid loopfilter.
-  // Since frame dropper is off, we can expect 200 (half of the sequence)
-  // mismatched frames.
-  EXPECT_EQ(static_cast<unsigned int>(200), GetMismatchFrames());
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
  #endif
  }
  
@@ -2117,6 +2131,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_DisableEnableLayers) {
    // for part of the sequence.
    CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1,
                            number_temporal_layers_, file_datarate_, 0.78, 1.15);
+#if CONFIG_VP9_DECODER
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
  }
  
  // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3
@@ -2164,12 +2183,17 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) {
      CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
                              number_temporal_layers_, file_datarate_, 0.78,
                              1.15);
+#if CONFIG_VP9_DECODER
+    // The non-reference frames are expected to be mismatched frames as the
+    // encoder will avoid loopfilter on these frames.
+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
    }
  }
  
  // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
  // 3 temporal layers. Run HD clip with 4 threads.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) {
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4Threads) {
    cfg_.rc_buf_initial_sz = 500;
    cfg_.rc_buf_optimal_sz = 500;
    cfg_.rc_buf_sz = 1000;
@@ -2198,21 +2222,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) {
    ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
    top_sl_width_ = 1280;
    top_sl_height_ = 720;
-  cfg_.rc_target_bitrate = 800;
-  ResetModel();
-  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
-                        cfg_.ts_number_layers, cfg_.temporal_layering_mode,
-                        layer_target_avg_bandwidth_, bits_in_buffer_model_);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
-                          number_temporal_layers_, file_datarate_, 0.78, 1.15);
+  for (int i = 200; i <= 800; i += 300) {
+    cfg_.rc_target_bitrate = i;
+    ResetModel();
+    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+                          cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+                          layer_target_avg_bandwidth_, bits_in_buffer_model_);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
+                            number_temporal_layers_, file_datarate_, 0.75, 1.2);
  #if CONFIG_VP9_DECODER
-  // Number of temporal layers > 1, so half of the frames in this SVC pattern
-  // will be non-reference frame and hence encoder will avoid loopfilter.
-  // Since frame dropper is off, we can expect 30 (half of the sequence)
-  // mismatched frames.
-  EXPECT_EQ(static_cast<unsigned int>(30), GetMismatchFrames());
+    // The non-reference frames are expected to be mismatched frames as the
+    // encoder will avoid loopfilter on these frames.
+    EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
  #endif
+  }
  }
  
  // Run SVC encoder for 1 temporal layer, 2 spatial layers, with spatial
@@ -2258,7 +2282,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TL5x5MultipleRuns) {
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    CheckLayerRateTargeting(&cfg_, number_spatial_layers_,
                            number_temporal_layers_, file_datarate_, 0.78, 1.15);
-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+#if CONFIG_VP9_DECODER
+  // The non-reference frames are expected to be mismatched frames as the
+  // encoder will avoid loopfilter on these frames.
+  EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
  }
  
  VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES,
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c

index 21418fde6a759bff0c1c9301c2922d09357896de..2bdc2b34a7bd837a00e1bb519a96bafdbcba20d3 100644 (file)
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -917,7 +917,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
          pkt.data.frame.flags = lib_flags << 16;
          pkt.data.frame.width[0] = cpi->common.Width;
          pkt.data.frame.height[0] = cpi->common.Height;
-        pkt.data.frame.last_spatial_layer_encoded = 0;
+        pkt.data.frame.spatial_layer_encoded[0] = 1;
  
          if (lib_flags & FRAMEFLAGS_KEY) {
            pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c

index 6fca2307457e977d0e57db3e0b2f3d550fa9491d..128574adf2cef2711bab512821933ee1f0a901b1 100644 (file)
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3751,6 +3751,24 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
  
    suppress_active_map(cpi);
  
+  // For SVC on non-zero spatial layer: if the previous spatial layer
+  // was dropped then disable the prediciton from this (scaled) reference.
+  if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
+      cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {
+    MV_REFERENCE_FRAME ref_frame;
+    static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+                                      VP9_ALT_FLAG };
+    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+      const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+      if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {
+        const struct scale_factors *const scale_fac =
+            &cm->frame_refs[ref_frame - 1].sf;
+        if (vp9_is_scaled(scale_fac))
+          cpi->ref_frame_flags &= (~flag_list[ref_frame]);
+      }
+    }
+  }
+
    // Variance adaptive and in frame q adjustment experiments are mutually
    // exclusive.
    if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@@ -4504,6 +4522,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
      vp9_rc_postencode_update_drop_frame(cpi);
      vp9_inc_frame_in_layer(cpi);
      cpi->ext_refresh_frame_flags_pending = 0;
+    cpi->last_frame_dropped = 1;
+    cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
+    cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
      return;
    }
  
@@ -4591,28 +4612,31 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
    }
  
    // For 1 pass CBR, check if we are dropping this frame.
-  // For spatial layers, for now if we decide to drop current spatial
-  // layer then we will also drop all upper spatial layers.
-  // TODO(marpan): Allow for the case of dropping single layer only without
-  // dropping all upper layers.
+  // Never drop on key frame, of if base layer is key for svc.
    if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
-      cm->frame_type != KEY_FRAME) {
-    if (vp9_rc_drop_frame(cpi) ||
-        (is_one_pass_cbr_svc(cpi) &&
-         cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) {
+      cm->frame_type != KEY_FRAME &&
+      (!cpi->use_svc ||
+       !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+    if (vp9_rc_drop_frame(cpi)) {
        vp9_rc_postencode_update_drop_frame(cpi);
        cpi->ext_refresh_frame_flags_pending = 0;
        cpi->last_frame_dropped = 1;
+      cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
        if (cpi->use_svc) {
-        int i;
-        // If we are dropping this spatial layer, then we will drop all
-        // upper spatial layers.
-        for (i = cpi->svc.spatial_layer_id; i < cpi->svc.number_spatial_layers;
-             i++)
-          cpi->svc.rc_drop_spatial_layer[i] = 1;
+        cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
          vp9_inc_frame_in_layer(cpi);
-        if (cpi->svc.rc_drop_spatial_layer[0] == 0)
-          cpi->svc.skip_enhancement_layer = 1;
+        cpi->svc.skip_enhancement_layer = 1;
+        if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
+          int i;
+          int all_layers_drop = 1;
+          for (i = 0; i < cpi->svc.spatial_layer_id; i++) {
+            if (cpi->svc.drop_spatial_layer[i] == 0) {
+              all_layers_drop = 0;
+              break;
+            }
+          }
+          if (all_layers_drop == 1) cpi->svc.skip_enhancement_layer = 0;
+        }
        }
        return;
      }
@@ -4632,7 +4656,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
    }
  
    cpi->last_frame_dropped = 0;
-  cpi->svc.last_layer_encoded = cpi->svc.spatial_layer_id;
+  cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
  
    // Disable segmentation if it decrease rate/distortion ratio
    if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c

index a34eaf86929ed598cc7d6ae88e41bf06f598e84c..46c917b8e0256a2552f038f5cde73edbabbf63d6 100644 (file)
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -396,9 +396,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
  int vp9_rc_drop_frame(VP9_COMP *cpi) {
    const VP9EncoderConfig *oxcf = &cpi->oxcf;
    RATE_CONTROL *const rc = &cpi->rc;
-  if (!oxcf->drop_frames_water_mark ||
-      (is_one_pass_cbr_svc(cpi) &&
-       cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) {
+  if (!oxcf->drop_frames_water_mark) {
      return 0;
    } else {
      if (rc->buffer_level < 0) {
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c

index c5eae626333e5e2e4a431043d55888c40c25c6ef..cfa6aa4035c6bd108b2b0e6827f616c3166aac73 100644 (file)
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -596,7 +596,8 @@ static void set_rt_speed_feature_framesize_independent(
      if (!cpi->last_frame_dropped && cpi->resize_state == ORIG &&
          !cpi->external_resize &&
          (!cpi->use_svc ||
-         cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
+         (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 &&
+          !cpi->svc.last_layer_dropped[cpi->svc.number_spatial_layers - 1]))) {
        sf->copy_partition_flag = 1;
        cpi->max_copied_frame = 2;
        // The top temporal enhancement layer (for number of temporal layers > 1)
@@ -666,6 +667,11 @@ static void set_rt_speed_feature_framesize_independent(
            (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
                                  sizeof(*cpi->count_lastgolden_frame_usage));
    }
+  // Disable adaptive_rd_thresh for row_mt for SVC with frame dropping.
+  // This is causing some tests to fail.
+  // TODO(marpan/jianj): Look into this failure and re-enable later.
+  if (cpi->use_svc && cpi->oxcf.drop_frames_water_mark)
+    sf->adaptive_rd_thresh_row_mt = 0;
  }
  
  void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c

index 2b68047c51b7390aa66d35ab017fbea0346e418d..1957bb9ece6a702df3aafbd00aa91b18b0ef364e 100644 (file)
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -38,11 +38,11 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
    svc->current_superframe = 0;
    svc->non_reference_frame = 0;
    svc->skip_enhancement_layer = 0;
-  svc->last_layer_encoded = 0;
  
    for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
    for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
-    svc->rc_drop_spatial_layer[sl] = 0;
+    svc->last_layer_dropped[sl] = 0;
+    svc->drop_spatial_layer[sl] = 0;
      svc->ext_frame_flags[sl] = 0;
      svc->ext_lst_fb_idx[sl] = 0;
      svc->ext_gld_fb_idx[sl] = 1;
@@ -649,11 +649,12 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
      }
    }
  
-  // Reset the drop flags for all spatial lauyers, on the base layer.
+  // Reset the drop flags for all spatial layers, on the base layer.
    if (cpi->svc.spatial_layer_id == 0) {
      int i;
-    for (i = 0; i < cpi->svc.number_spatial_layers; i++)
-      cpi->svc.rc_drop_spatial_layer[i] = 0;
+    for (i = 0; i < cpi->svc.number_spatial_layers; i++) {
+      cpi->svc.drop_spatial_layer[i] = 0;
+    }
    }
  
    lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
@@ -702,6 +703,13 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
          break;
        }
      }
+    // For non-zero spatial layers: if the previous spatial layer was dropped
+    // disable the base_mv and partition_reuse features.
+    if (cpi->svc.spatial_layer_id > 0 &&
+        cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {
+      cpi->svc.use_base_mv = 0;
+      cpi->svc.use_partition_reuse = 0;
+    }
    }
  
    cpi->svc.non_reference_frame = 0;
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h

index 0addb57d8819034c25d0027e43228edac01ef999..9bf62ee61de3e58e8bcd122ee8d7bf76584069d6 100644 (file)
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -57,7 +57,6 @@ typedef struct SVC {
  
    int spatial_layer_to_encode;
    int first_spatial_layer_to_encode;
-  int rc_drop_spatial_layer[VPX_MAX_LAYERS];
  
    // Workaround for multiple frame contexts
    enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state;
@@ -107,7 +106,8 @@ typedef struct SVC {
  
    int lower_layer_qindex;
  
-  int last_layer_encoded;
+  int last_layer_dropped[VPX_MAX_LAYERS];
+  int drop_spatial_layer[VPX_MAX_LAYERS];
  } SVC;
  
  struct VP9_COMP;
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c

index 387d6dc9aeb4edf61bd5970b493d0f0115ece7c7..a2c5dc83e3d41ca38caaae90820f5fabf4564032 100644 (file)
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -1205,8 +1205,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
            cx_data_sz -= size;
            pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
            pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
-          pkt.data.frame.last_spatial_layer_encoded =
-              cpi->svc.last_layer_encoded;
+          pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
+              1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
  
            if (ctx->output_cx_pkt_cb.output_cx_pkt) {
              pkt.kind = VPX_CODEC_CX_FRAME_PKT;
@@ -1235,7 +1235,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
          pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
          pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
          pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
-        pkt.data.frame.last_spatial_layer_encoded = cpi->svc.last_layer_encoded;
+        pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
+            1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
  
          if (ctx->pending_cx_data) {
            if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h

index dfdebe35668c1f2cb7bb1bda06ba529fbd8eccf0..26eb054551b01cf611201429010ccc97c2c25bb9 100644 (file)
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -63,7 +63,7 @@ extern "C" {
   * fields to structures
   */
  #define VPX_ENCODER_ABI_VERSION \
-  (8 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
+  (9 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
  
  /*! \brief Encoder capabilities bitfield
   *
@@ -181,9 +181,9 @@ typedef struct vpx_codec_cx_pkt {
         * first one.*/
        unsigned int width[VPX_SS_MAX_LAYERS];  /**< frame width */
        unsigned int height[VPX_SS_MAX_LAYERS]; /**< frame height */
-      /*!\brief Last spatial layer frame in this packet. VP8 will always be set
-       * to 0.*/
-      unsigned int last_spatial_layer_encoded;
+      /*!\brief Flag to indicate if spatial layer frame in this packet is
+       * encoded or dropped. VP8 will always be set to 1.*/
+      uint8_t spatial_layer_encoded[VPX_SS_MAX_LAYERS];
      } frame;                            /**< data for compressed frame packet */
      vpx_fixed_buf_t twopass_stats;      /**< data for two-pass packet */
      vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */
author	Marco Paniconi <marpan@google.com>
	Mon, 19 Mar 2018 03:46:30 +0000 (20:46 -0700)
committer	Marco Paniconi <marpan@google.com>
	Tue, 20 Mar 2018 17:34:45 +0000 (10:34 -0700)
examples/vp9_spatial_svc_encoder.c		patch \| blob \| history
test/datarate_test.cc		patch \| blob \| history
vp8/vp8_cx_iface.c		patch \| blob \| history
vp9/encoder/vp9_encoder.c		patch \| blob \| history
vp9/encoder/vp9_ratectrl.c		patch \| blob \| history
vp9/encoder/vp9_speed_features.c		patch \| blob \| history
vp9/encoder/vp9_svc_layercontext.c		patch \| blob \| history
vp9/encoder/vp9_svc_layercontext.h		patch \| blob \| history
vp9/vp9_cx_iface.c		patch \| blob \| history
vpx/vpx_encoder.h		patch \| blob \| history