From: Marco Paniconi Date: Mon, 19 Mar 2018 03:46:30 +0000 (-0700) Subject: vp9-svc: Improve frame dropper for spatial layers. X-Git-Tag: v1.8.0~787^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=126a3718fcada02b47e9eaa3210aa7b71694a0b1;p=libvpx vp9-svc: Improve frame dropper for spatial layers. SVC frame dropper: modify the logic to allow for individual spatial layers to drop. This removes the constraint that all upper spatial layers must drop when a given spatial layer drops. Add a flag to the pkt to indicate whether a spatial layer is encoded or dropped. This is needed for applications that enable this feature (frame dropping for SVC). For a current spatial layer, if its previous spatial layer is dropped, then disable certain features for that layer: inter-layer prediction, base_mv, partition_reuse, copy partition. Also add the constraint to never drop a spatial layer if its base layer is a key frame. Updates to sample encoder (vp9_spatial_svc_encoder) and the SVC datarate unittests to properly handle frame dropping. Bump up ABI version. Change-Id: I7d14ccf67b8d014a7abfce5ba3989fc623e94067 --- diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index 5b98ce7af..e93f0634e 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -503,10 +503,8 @@ static void printout_rate_control_summary(struct RateControlStats *rc, printf("Average, rms-variance, and percent-fluct: %f %f %f \n", rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), perc_fluctuation); - if (frame_cnt != tot_num_frames) - die("Error: Number of input frames not equal to output encoded frames != " - "%d tot_num_frames = %d\n", - frame_cnt, tot_num_frames); + printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt, + tot_num_frames); } vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz, @@ -784,8 +782,10 @@ int main(int argc, const char **argv) { if (cx_pkt->data.frame.sz > 0) { #if OUTPUT_RC_STATS uint64_t sizes[8]; + uint64_t sizes_parsed[8]; int count = 0; vp9_zero(sizes); + vp9_zero(sizes_parsed); #endif vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, @@ -795,18 +795,21 @@ int main(int argc, const char **argv) { if (svc_ctx.output_rc_stat) { vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id); parse_superframe_index(cx_pkt->data.frame.buf, - cx_pkt->data.frame.sz, sizes, &count); + cx_pkt->data.frame.sz, sizes_parsed, + &count); if (enc_cfg.ss_number_layers == 1) sizes[0] = cx_pkt->data.frame.sz; - // Note computing input_layer_frames here won't account for frame - // drops in rate control stats. - // TODO(marpan): Fix this for non-bypass mode so we can get stats - // for dropped frames. if (svc_ctx.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + int num_layers_encoded = 0; for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + layer_id.temporal_layer_id]; + sizes[sl] = 0; + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { + sizes[sl] = sizes_parsed[num_layers_encoded]; + num_layers_encoded++; + } } } for (tl = layer_id.temporal_layer_id; @@ -817,20 +820,22 @@ int main(int argc, const char **argv) { } for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - for (tl = layer_id.temporal_layer_id; - tl < enc_cfg.ts_number_layers; ++tl) { - const int layer = sl * enc_cfg.ts_number_layers + tl; - ++rc.layer_tot_enc_frames[layer]; - rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; - // Keep count of rate control stats per layer, for non-key - // frames. - if (tl == (unsigned int)layer_id.temporal_layer_id && - !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { - rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl]; - rc.layer_avg_rate_mismatch[layer] += - fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) / - rc.layer_pfb[layer]; - ++rc.layer_enc_frames[layer]; + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { + for (tl = layer_id.temporal_layer_id; + tl < enc_cfg.ts_number_layers; ++tl) { + const int layer = sl * enc_cfg.ts_number_layers + tl; + ++rc.layer_tot_enc_frames[layer]; + rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; + // Keep count of rate control stats per layer, for non-key + // frames. + if (tl == (unsigned int)layer_id.temporal_layer_id && + !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { + rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl]; + rc.layer_avg_rate_mismatch[layer] += + fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) / + rc.layer_pfb[layer]; + ++rc.layer_enc_frames[layer]; + } } } } @@ -840,7 +845,8 @@ int main(int argc, const char **argv) { // Ignore first window segment, due to key frame. if (frame_cnt > (unsigned int)rc.window_size) { for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) + sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; } if (frame_cnt % rc.window_size == 0) { rc.window_count += 1; diff --git a/test/datarate_test.cc b/test/datarate_test.cc index a506e5530..8dab2f17e 100644 --- a/test/datarate_test.cc +++ b/test/datarate_test.cc @@ -1368,6 +1368,7 @@ class DatarateOnePassCbrSvc top_bitrate_ = 0; superframe_count_ = -1; key_frame_spacing_ = 9999; + num_nonref_frames_ = 0; } virtual void BeginPassHook(unsigned int /*pass*/) {} @@ -1588,7 +1589,9 @@ class DatarateOnePassCbrSvc virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { uint32_t sizes[8] = { 0 }; + uint32_t sizes_parsed[8] = { 0 }; int count = 0; + int num_layers_encoded = 0; last_pts_ = pkt->data.frame.pts; const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; @@ -1597,9 +1600,22 @@ class DatarateOnePassCbrSvc superframe_count_ = 0; } parse_superframe_index(static_cast(pkt->data.frame.buf), - pkt->data.frame.sz, sizes, &count); + pkt->data.frame.sz, sizes_parsed, &count); // Count may be less than number of spatial layers because of frame drops. - ASSERT_LE(count, number_spatial_layers_); + for (int sl = 0; sl < number_spatial_layers_; ++sl) { + if (pkt->data.frame.spatial_layer_encoded[sl]) { + sizes[sl] = sizes_parsed[num_layers_encoded]; + num_layers_encoded++; + } + } + ASSERT_EQ(count, num_layers_encoded); + // Keep track of number of non-reference frames, needed for mismatch check. + // Non-reference frames are top spatial and temporal layer frames, + // for TL > 0. + if (temporal_layer_id_ == number_temporal_layers_ - 1 && + temporal_layer_id_ > 0 && + pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1]) + num_nonref_frames_++; for (int sl = 0; sl < number_spatial_layers_; ++sl) { sizes[sl] = sizes[sl] << 3; // Update the total encoded bits per layer. @@ -1678,6 +1694,7 @@ class DatarateOnePassCbrSvc int top_bitrate_; int superframe_count_; int key_frame_spacing_; + unsigned int num_nonref_frames_; }; // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1 @@ -1718,7 +1735,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TLScreenContent1) { ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(&cfg_, number_spatial_layers_, number_temporal_layers_, file_datarate_, 0.78, 1.15); - EXPECT_EQ(static_cast(0), GetMismatchFrames()); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); +#endif } // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and @@ -1761,14 +1782,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL) { layer_target_avg_bandwidth_, bits_in_buffer_model_); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, - 1.15); + number_temporal_layers_, file_datarate_, 0.75, 1.2); #if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 200 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(200), GetMismatchFrames()); + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); #endif } } @@ -1820,12 +1838,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) { number_temporal_layers_, file_datarate_, 0.78, 1.15); #if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC - // pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 200 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(200), GetMismatchFrames()); + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); #endif } } @@ -1874,6 +1889,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLSmallKf) { CheckLayerRateTargeting(&cfg_, number_spatial_layers_, number_temporal_layers_, file_datarate_, 0.78, 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); +#endif } } @@ -1906,21 +1926,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TL4Threads) { ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); top_sl_width_ = 1280; top_sl_height_ = 720; - cfg_.rc_target_bitrate = 800; - ResetModel(); - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, 1.15); + for (int i = 200; i <= 800; i += 300) { + cfg_.rc_target_bitrate = i; + ResetModel(); + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode, + layer_target_avg_bandwidth_, bits_in_buffer_model_); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(&cfg_, number_spatial_layers_, + number_temporal_layers_, file_datarate_, 0.75, 1.2); #if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 30 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(30), GetMismatchFrames()); + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); #endif + } } // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and @@ -1964,11 +1984,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL) { CheckLayerRateTargeting(&cfg_, number_spatial_layers_, number_temporal_layers_, file_datarate_, 0.78, 1.15); #if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 200 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(200), GetMismatchFrames()); + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); #endif } @@ -2014,11 +2032,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLDynamicBitrateChange) { CheckLayerRateTargeting(&cfg_, number_spatial_layers_, number_temporal_layers_, file_datarate_, 0.78, 1.15); #if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 200 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(200), GetMismatchFrames()); + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); #endif } @@ -2066,11 +2082,9 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL2TLDynamicPatternChange) { CheckLayerRateTargeting(&cfg_, number_spatial_layers_, number_temporal_layers_, file_datarate_, 0.78, 1.15); #if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 200 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(200), GetMismatchFrames()); + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); #endif } @@ -2117,6 +2131,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL_DisableEnableLayers) { // for part of the sequence. CheckLayerRateTargeting(&cfg_, number_spatial_layers_ - 1, number_temporal_layers_, file_datarate_, 0.78, 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); +#endif } // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3 @@ -2164,12 +2183,17 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TLSmallKf) { CheckLayerRateTargeting(&cfg_, number_spatial_layers_, number_temporal_layers_, file_datarate_, 0.78, 1.15); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); +#endif } } // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and // 3 temporal layers. Run HD clip with 4 threads. -TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) { +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4Threads) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; @@ -2198,21 +2222,21 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SL3TL4threads) { ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); top_sl_width_ = 1280; top_sl_height_ = 720; - cfg_.rc_target_bitrate = 800; - ResetModel(); - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode, - layer_target_avg_bandwidth_, bits_in_buffer_model_); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - CheckLayerRateTargeting(&cfg_, number_spatial_layers_, - number_temporal_layers_, file_datarate_, 0.78, 1.15); + for (int i = 200; i <= 800; i += 300) { + cfg_.rc_target_bitrate = i; + ResetModel(); + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode, + layer_target_avg_bandwidth_, bits_in_buffer_model_); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(&cfg_, number_spatial_layers_, + number_temporal_layers_, file_datarate_, 0.75, 1.2); #if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expect 30 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(30), GetMismatchFrames()); + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); #endif + } } // Run SVC encoder for 1 temporal layer, 2 spatial layers, with spatial @@ -2258,7 +2282,11 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL1TL5x5MultipleRuns) { ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(&cfg_, number_spatial_layers_, number_temporal_layers_, file_datarate_, 0.78, 1.15); - EXPECT_EQ(static_cast(0), GetMismatchFrames()); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(num_nonref_frames_, GetMismatchFrames()); +#endif } VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES, diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 21418fde6..2bdc2b34a 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -917,7 +917,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, pkt.data.frame.flags = lib_flags << 16; pkt.data.frame.width[0] = cpi->common.Width; pkt.data.frame.height[0] = cpi->common.Height; - pkt.data.frame.last_spatial_layer_encoded = 0; + pkt.data.frame.spatial_layer_encoded[0] = 1; if (lib_flags & FRAMEFLAGS_KEY) { pkt.data.frame.flags |= VPX_FRAME_IS_KEY; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 6fca23074..128574adf 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3751,6 +3751,24 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, suppress_active_map(cpi); + // For SVC on non-zero spatial layer: if the previous spatial layer + // was dropped then disable the prediciton from this (scaled) reference. + if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && + cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) { + MV_REFERENCE_FRAME ref_frame; + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) { + const struct scale_factors *const scale_fac = + &cm->frame_refs[ref_frame - 1].sf; + if (vp9_is_scaled(scale_fac)) + cpi->ref_frame_flags &= (~flag_list[ref_frame]); + } + } + } + // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { @@ -4504,6 +4522,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, vp9_rc_postencode_update_drop_frame(cpi); vp9_inc_frame_in_layer(cpi); cpi->ext_refresh_frame_flags_pending = 0; + cpi->last_frame_dropped = 1; + cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; + cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; return; } @@ -4591,28 +4612,31 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, } // For 1 pass CBR, check if we are dropping this frame. - // For spatial layers, for now if we decide to drop current spatial - // layer then we will also drop all upper spatial layers. - // TODO(marpan): Allow for the case of dropping single layer only without - // dropping all upper layers. + // Never drop on key frame, of if base layer is key for svc. if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && - cm->frame_type != KEY_FRAME) { - if (vp9_rc_drop_frame(cpi) || - (is_one_pass_cbr_svc(cpi) && - cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) { + cm->frame_type != KEY_FRAME && + (!cpi->use_svc || + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { + if (vp9_rc_drop_frame(cpi)) { vp9_rc_postencode_update_drop_frame(cpi); cpi->ext_refresh_frame_flags_pending = 0; cpi->last_frame_dropped = 1; + cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; if (cpi->use_svc) { - int i; - // If we are dropping this spatial layer, then we will drop all - // upper spatial layers. - for (i = cpi->svc.spatial_layer_id; i < cpi->svc.number_spatial_layers; - i++) - cpi->svc.rc_drop_spatial_layer[i] = 1; + cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; vp9_inc_frame_in_layer(cpi); - if (cpi->svc.rc_drop_spatial_layer[0] == 0) - cpi->svc.skip_enhancement_layer = 1; + cpi->svc.skip_enhancement_layer = 1; + if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { + int i; + int all_layers_drop = 1; + for (i = 0; i < cpi->svc.spatial_layer_id; i++) { + if (cpi->svc.drop_spatial_layer[i] == 0) { + all_layers_drop = 0; + break; + } + } + if (all_layers_drop == 1) cpi->svc.skip_enhancement_layer = 0; + } } return; } @@ -4632,7 +4656,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, } cpi->last_frame_dropped = 0; - cpi->svc.last_layer_encoded = cpi->svc.spatial_layer_id; + cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0; // Disable segmentation if it decrease rate/distortion ratio if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index a34eaf869..46c917b8e 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -396,9 +396,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { int vp9_rc_drop_frame(VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; - if (!oxcf->drop_frames_water_mark || - (is_one_pass_cbr_svc(cpi) && - cpi->svc.rc_drop_spatial_layer[cpi->svc.spatial_layer_id] == 1)) { + if (!oxcf->drop_frames_water_mark) { return 0; } else { if (rc->buffer_level < 0) { diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index c5eae6263..cfa6aa403 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -596,7 +596,8 @@ static void set_rt_speed_feature_framesize_independent( if (!cpi->last_frame_dropped && cpi->resize_state == ORIG && !cpi->external_resize && (!cpi->use_svc || - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { + (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && + !cpi->svc.last_layer_dropped[cpi->svc.number_spatial_layers - 1]))) { sf->copy_partition_flag = 1; cpi->max_copied_frame = 2; // The top temporal enhancement layer (for number of temporal layers > 1) @@ -666,6 +667,11 @@ static void set_rt_speed_feature_framesize_independent( (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(*cpi->count_lastgolden_frame_usage)); } + // Disable adaptive_rd_thresh for row_mt for SVC with frame dropping. + // This is causing some tests to fail. + // TODO(marpan/jianj): Look into this failure and re-enable later. + if (cpi->use_svc && cpi->oxcf.drop_frames_water_mark) + sf->adaptive_rd_thresh_row_mt = 0; } void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 2b68047c5..1957bb9ec 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -38,11 +38,11 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->current_superframe = 0; svc->non_reference_frame = 0; svc->skip_enhancement_layer = 0; - svc->last_layer_encoded = 0; for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { - svc->rc_drop_spatial_layer[sl] = 0; + svc->last_layer_dropped[sl] = 0; + svc->drop_spatial_layer[sl] = 0; svc->ext_frame_flags[sl] = 0; svc->ext_lst_fb_idx[sl] = 0; svc->ext_gld_fb_idx[sl] = 1; @@ -649,11 +649,12 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { } } - // Reset the drop flags for all spatial lauyers, on the base layer. + // Reset the drop flags for all spatial layers, on the base layer. if (cpi->svc.spatial_layer_id == 0) { int i; - for (i = 0; i < cpi->svc.number_spatial_layers; i++) - cpi->svc.rc_drop_spatial_layer[i] = 0; + for (i = 0; i < cpi->svc.number_spatial_layers; i++) { + cpi->svc.drop_spatial_layer[i] = 0; + } } lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * @@ -702,6 +703,13 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { break; } } + // For non-zero spatial layers: if the previous spatial layer was dropped + // disable the base_mv and partition_reuse features. + if (cpi->svc.spatial_layer_id > 0 && + cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) { + cpi->svc.use_base_mv = 0; + cpi->svc.use_partition_reuse = 0; + } } cpi->svc.non_reference_frame = 0; diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 0addb57d8..9bf62ee61 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -57,7 +57,6 @@ typedef struct SVC { int spatial_layer_to_encode; int first_spatial_layer_to_encode; - int rc_drop_spatial_layer[VPX_MAX_LAYERS]; // Workaround for multiple frame contexts enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state; @@ -107,7 +106,8 @@ typedef struct SVC { int lower_layer_qindex; - int last_layer_encoded; + int last_layer_dropped[VPX_MAX_LAYERS]; + int drop_spatial_layer[VPX_MAX_LAYERS]; } SVC; struct VP9_COMP; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 387d6dc9a..a2c5dc83e 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -1205,8 +1205,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, cx_data_sz -= size; pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height; - pkt.data.frame.last_spatial_layer_encoded = - cpi->svc.last_layer_encoded; + pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = + 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; if (ctx->output_cx_pkt_cb.output_cx_pkt) { pkt.kind = VPX_CODEC_CX_FRAME_PKT; @@ -1235,7 +1235,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height; - pkt.data.frame.last_spatial_layer_encoded = cpi->svc.last_layer_encoded; + pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = + 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; if (ctx->pending_cx_data) { if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index dfdebe356..26eb05455 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -63,7 +63,7 @@ extern "C" { * fields to structures */ #define VPX_ENCODER_ABI_VERSION \ - (8 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ + (9 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ /*! \brief Encoder capabilities bitfield * @@ -181,9 +181,9 @@ typedef struct vpx_codec_cx_pkt { * first one.*/ unsigned int width[VPX_SS_MAX_LAYERS]; /**< frame width */ unsigned int height[VPX_SS_MAX_LAYERS]; /**< frame height */ - /*!\brief Last spatial layer frame in this packet. VP8 will always be set - * to 0.*/ - unsigned int last_spatial_layer_encoded; + /*!\brief Flag to indicate if spatial layer frame in this packet is + * encoded or dropped. VP8 will always be set to 1.*/ + uint8_t spatial_layer_encoded[VPX_SS_MAX_LAYERS]; } frame; /**< data for compressed frame packet */ vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */ vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */