From d4a407c051a1aa85214e7b96c1ad1e0ba41c6e9e Mon Sep 17 00:00:00 2001 From: Minghai Shang Date: Mon, 18 Aug 2014 14:51:04 -0700 Subject: [PATCH] [spatial svc]Multiple frame context feature We can use one frame context for each layer so that we don't have to reset the probs every frame. But we can't use prev_mi since we may drop enhancement layers. So we have to generate a non vp9 compatible bitstream and modify it in the player. 1. We need to code all frames as invisible frame to let prev_mi not to be used. But in the bitstream we need to code the show_frame flag to 1 so that the publisher will know it's supposed to be a visible frame. 2. In the player we need to change the show_frame flag to 0 for all frames. Then add an one byte frame into the super frame to tell the decoder which layer we want to show. Change-Id: I75b7304cf31f0ab952f043e33c034495e88f01f3 --- test/svc_test.cc | 182 +++++++++++++++++++++++++---- vp9/encoder/vp9_bitstream.c | 11 +- vp9/encoder/vp9_encoder.c | 25 +++- vp9/encoder/vp9_svc_layercontext.c | 1 + vp9/encoder/vp9_svc_layercontext.h | 1 + vp9/vp9_cx_iface.c | 2 + vpx/src/svc_encodeframe.c | 11 +- 7 files changed, 209 insertions(+), 24 deletions(-) diff --git a/test/svc_test.cc b/test/svc_test.cc index 1cb01a407..fa95608ca 100644 --- a/test/svc_test.cc +++ b/test/svc_test.cc @@ -112,7 +112,7 @@ class SvcTest : public ::testing::Test { video.Next(); } - // Flush encoder and test EOS packet + // Flush encoder and test EOS packet. res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(), video.duration(), VPX_DL_GOOD_QUALITY); stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_); @@ -135,7 +135,7 @@ class SvcTest : public ::testing::Test { EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_)); } - outputs[*frame_received].buf = malloc(frame_size); + outputs[*frame_received].buf = malloc(frame_size + 16); ASSERT_TRUE(outputs[*frame_received].buf != NULL); memcpy(outputs[*frame_received].buf, vpx_svc_get_buffer(&svc_), frame_size); @@ -176,13 +176,13 @@ class SvcTest : public ::testing::Test { video.Next(); } - // Flush Encoder + // Flush encoder. res = vpx_svc_encode(&svc_, &codec_, NULL, 0, video.duration(), VPX_DL_GOOD_QUALITY); EXPECT_EQ(VPX_CODEC_OK, res); StoreFrames(n, outputs, &frame_received); - EXPECT_EQ(frame_received, (size_t)n); + EXPECT_EQ(frame_received, static_cast(n)); ReleaseEncoder(); } @@ -204,7 +204,7 @@ class SvcTest : public ::testing::Test { ++decoded_frames; DxDataIterator dec_iter = decoder_->GetDxData(); - while (dec_iter.Next()) { + while (dec_iter.Next() != NULL) { ++received_frames; } } @@ -214,7 +214,8 @@ class SvcTest : public ::testing::Test { void DropEnhancementLayers(struct vpx_fixed_buf *const inputs, const int num_super_frames, - const int remained_layers) { + const int remained_layers, + const bool is_multiple_frame_context) { ASSERT_TRUE(inputs != NULL); ASSERT_GT(num_super_frames, 0); ASSERT_GT(remained_layers, 0); @@ -236,7 +237,7 @@ class SvcTest : public ::testing::Test { uint8_t *frame_data = static_cast(inputs[i].buf); uint8_t *frame_start = frame_data; for (frame = 0; frame < frame_count; ++frame) { - // Looking for a visible frame + // Looking for a visible frame. if (frame_data[0] & 0x02) { ++frames_found; if (frames_found == remained_layers) @@ -244,11 +245,17 @@ class SvcTest : public ::testing::Test { } frame_data += frame_sizes[frame]; } - ASSERT_LT(frame, frame_count); - if (frame == frame_count - 1) + ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. " + << "remaining_layers: " << remained_layers + << " super_frame: " << i + << " is_multiple_frame_context: " << is_multiple_frame_context; + if (frame == frame_count - 1 && !is_multiple_frame_context) continue; frame_data += frame_sizes[frame]; + // We need to add one more frame for multiple frame context. + if (is_multiple_frame_context) + ++frame; uint8_t marker = static_cast(inputs[i].buf)[inputs[i].sz - 1]; const uint32_t mag = ((marker >> 3) & 0x3) + 1; @@ -256,11 +263,37 @@ class SvcTest : public ::testing::Test { const size_t new_index_sz = 2 + mag * (frame + 1); marker &= 0x0f8; marker |= frame; + + // Copy existing frame sizes. + memmove(frame_data + (is_multiple_frame_context ? 2 : 1), + frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2); + if (is_multiple_frame_context) { + // Add a one byte frame with flag show_existing frame. + *frame_data++ = 0x88 | (remained_layers - 1); + } + // New marker. frame_data[0] = marker; - memcpy(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1, - new_index_sz - 2); - frame_data[new_index_sz - 1] = marker; - inputs[i].sz = frame_data - frame_start + new_index_sz; + frame_data += (mag * (frame + 1) + 1); + + if (is_multiple_frame_context) { + // Write the frame size for the one byte frame. + frame_data -= mag; + *frame_data++ = 1; + for (uint32_t j = 1; j < mag; ++j) { + *frame_data++ = 0; + } + } + + *frame_data++ = marker; + inputs[i].sz = frame_data - frame_start; + + if (is_multiple_frame_context) { + // Change the show frame flag to 0 for all frames. + for (int j = 0; j < frame; ++j) { + frame_start[0] &= ~2; + frame_start += frame_sizes[j]; + } + } } } @@ -507,7 +540,7 @@ TEST_F(SvcTest, TwoPassEncode2LayersDecodeBaseLayerOnly) { vpx_fixed_buf outputs[10]; memset(&outputs[0], 0, sizeof(outputs)); Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); - DropEnhancementLayers(&outputs[0], 10, 1); + DropEnhancementLayers(&outputs[0], 10, 1, false); DecodeNFrames(&outputs[0], 10); FreeBitstreamBuffers(&outputs[0], 10); } @@ -525,13 +558,13 @@ TEST_F(SvcTest, TwoPassEncode5LayersDecode54321Layers) { Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]); DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 4); + DropEnhancementLayers(&outputs[0], 10, 4, false); DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 3); + DropEnhancementLayers(&outputs[0], 10, 3, false); DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 2); + DropEnhancementLayers(&outputs[0], 10, 2, false); DecodeNFrames(&outputs[0], 10); - DropEnhancementLayers(&outputs[0], 10, 1); + DropEnhancementLayers(&outputs[0], 10, 1, false); DecodeNFrames(&outputs[0], 10); FreeBitstreamBuffers(&outputs[0], 10); @@ -568,12 +601,121 @@ TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) { memset(&outputs[0], 0, sizeof(outputs)); Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]); DecodeNFrames(&outputs[0], 20); - DropEnhancementLayers(&outputs[0], 20, 2); + DropEnhancementLayers(&outputs[0], 20, 2, false); DecodeNFrames(&outputs[0], 20); - DropEnhancementLayers(&outputs[0], 20, 1); + DropEnhancementLayers(&outputs[0], 20, 1, false); DecodeNFrames(&outputs[0], 20); FreeBitstreamBuffers(&outputs[0], 20); } +TEST_F(SvcTest, SetMultipleFrameContextOption) { + svc_.spatial_layers = 5; + vpx_codec_err_t res = + vpx_svc_set_options(&svc_, "multi-frame-contexts=1"); + EXPECT_EQ(VPX_CODEC_OK, res); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + svc_.spatial_layers = 2; + res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1"); + InitializeEncoder(); +} + +TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContext) { + // First pass encode + std::string stats_buf; + Pass1EncodeNFrames(10, 2, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + codec_enc_.g_error_resilient = 0; + vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); + DropEnhancementLayers(&outputs[0], 10, 2, true); + DecodeNFrames(&outputs[0], 10); + FreeBitstreamBuffers(&outputs[0], 10); +} + +TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContextDecodeBaselayer) { + // First pass encode + std::string stats_buf; + Pass1EncodeNFrames(10, 2, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + codec_enc_.g_error_resilient = 0; + vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); + DropEnhancementLayers(&outputs[0], 10, 1, true); + DecodeNFrames(&outputs[0], 10); + FreeBitstreamBuffers(&outputs[0], 10); +} + +TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContext) { + // First pass encode + std::string stats_buf; + vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1"); + Pass1EncodeNFrames(10, 2, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + codec_enc_.g_error_resilient = 0; + vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 scale-factors=1/1,1/1 " + "multi-frame-contexts=1"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]); + DropEnhancementLayers(&outputs[0], 10, 2, true); + DecodeNFrames(&outputs[0], 10); + FreeBitstreamBuffers(&outputs[0], 10); +} + +TEST_F(SvcTest, TwoPassEncode3SNRLayersWithMultipleFrameContextDecode321Layer) { + // First pass encode + std::string stats_buf; + vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1"); + Pass1EncodeNFrames(10, 3, &stats_buf); + + // Second pass encode + codec_enc_.g_pass = VPX_RC_LAST_PASS; + codec_enc_.g_error_resilient = 0; + vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1 " + "multi-frame-contexts=1"); + vpx_fixed_buf outputs[10]; + memset(&outputs[0], 0, sizeof(outputs)); + Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]); + + vpx_fixed_buf outputs_new[10]; + for (int i = 0; i < 10; ++i) { + outputs_new[i].buf = malloc(outputs[i].sz + 16); + ASSERT_TRUE(outputs_new[i].buf != NULL); + memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz); + outputs_new[i].sz = outputs[i].sz; + } + DropEnhancementLayers(&outputs_new[0], 10, 3, true); + DecodeNFrames(&outputs_new[0], 10); + + for (int i = 0; i < 10; ++i) { + memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz); + outputs_new[i].sz = outputs[i].sz; + } + DropEnhancementLayers(&outputs_new[0], 10, 2, true); + DecodeNFrames(&outputs_new[0], 10); + + for (int i = 0; i < 10; ++i) { + memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz); + outputs_new[i].sz = outputs[i].sz; + } + DropEnhancementLayers(&outputs_new[0], 10, 1, true); + DecodeNFrames(&outputs_new[0], 10); + + FreeBitstreamBuffers(&outputs[0], 10); + FreeBitstreamBuffers(&outputs_new[0], 10); +} + } // namespace diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index b0ff0fa81..c8bb49c2c 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1081,7 +1081,16 @@ static void write_uncompressed_header(VP9_COMP *cpi, write_bitdepth_colorspace_sampling(cm, wb); write_frame_size(cm, wb); } else { - if (!cm->show_frame) + // In spatial svc if it's not error_resilient_mode then we need to code all + // visible frames as invisible. But we need to keep the show_frame flag so + // that the publisher could know whether it is supposed to be visible. + // So we will code the show_frame flag as it is. Then code the intra_only + // bit here. This will make the bitstream incompatible. In the player we + // will change to show_frame flag to 0, then add an one byte frame with + // show_existing_frame flag which tells the decoder which frame we want to + // show. + if (!cm->show_frame || + (is_spatial_svc(cpi) && cm->error_resilient_mode == 0)) vp9_wb_write_bit(wb, cm->intra_only); if (!cm->error_resilient_mode) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 8d7b07b1e..798da9b87 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2102,6 +2102,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->reset_frame_context = 2; } } + if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) { + cm->frame_context_idx = cpi->svc.spatial_layer_id; + + // The probs will be updated based on the frame type of its previous + // frame if frame_parallel_decoding_mode is 0. The type may vary for + // the frame after a key frame in base layer since we may drop enhancement + // layers. So set frame_parallel_decoding_mode to 1 in this case. + if (cpi->svc.spatial_layer_id == 0 && + cpi->svc.layer_context[0].last_frame_type == KEY_FRAME) + cm->frame_parallel_decoding_mode = 1; + else + cm->frame_parallel_decoding_mode = 0; + } // Configure experimental use of segmentation for enhanced coding of // static regions if indicated. @@ -2277,8 +2290,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->last_height = cm->height; // reset to normal state now that we are done. - if (!cm->show_existing_frame) - cm->last_show_frame = cm->show_frame; + if (!cm->show_existing_frame) { + if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) + cm->last_show_frame = 0; + else + cm->last_show_frame = cm->show_frame; + } if (cm->show_frame) { vp9_swap_mi_and_prev_mi(cm); @@ -2289,6 +2306,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, if (cpi->use_svc) vp9_inc_frame_in_layer(&cpi->svc); } + + if (is_spatial_svc(cpi)) + cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type = + cm->frame_type; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index bf949c456..91ac3d081 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -36,6 +36,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { int i; lc->current_video_frame_in_layer = 0; lc->layer_size = 0; + lc->last_frame_type = FRAME_TYPES; lrc->ni_av_qi = oxcf->worst_allowed_q; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 801449b6f..bc306d3ec 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -28,6 +28,7 @@ typedef struct { struct vpx_fixed_buf rc_twopass_stats_in; unsigned int current_video_frame_in_layer; int is_key_frame; + FRAME_TYPE last_frame_type; vpx_svc_parameters_t svc_params_received; struct lookahead_entry *alt_ref_source; int alt_ref_idx; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index dc89b4e9f..2d7f8e4c2 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -188,6 +188,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers) ERROR("Not enough ref buffers for svc alt ref frames"); } + if (cfg->ss_number_layers > 3 && cfg->g_error_resilient == 0) + ERROR("Multiple frame contexts are not supported for more than 3 layers"); #endif RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index 7828615b2..45b0dca5c 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -86,6 +86,7 @@ typedef struct SvcInternal { int layers; int layer; int is_keyframe; + int use_multiple_frame_contexts; FrameData *frame_list; FrameData *frame_temp; @@ -366,6 +367,7 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { char *option_name; char *option_value; char *input_ptr; + SvcInternal *const si = get_svc_internal(svc_ctx); vpx_codec_err_t res = VPX_CODEC_OK; if (options == NULL) return VPX_CODEC_OK; @@ -393,6 +395,8 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { } else if (strcmp("auto-alt-refs", option_name) == 0) { res = parse_auto_alt_ref(svc_ctx, option_value); if (res != VPX_CODEC_OK) break; + } else if (strcmp("multi-frame-contexts", option_name) == 0) { + si->use_multiple_frame_contexts = atoi(option_value); } else { svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name); res = VPX_CODEC_INVALID_PARAM; @@ -401,6 +405,10 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { option_name = strtok_r(NULL, "=", &input_ptr); } free(input_string); + + if (si->use_multiple_frame_contexts && svc_ctx->spatial_layers > 3) + res = VPX_CODEC_INVALID_PARAM; + return res; } @@ -534,7 +542,8 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, enc_cfg->rc_buf_initial_sz = 500; enc_cfg->rc_buf_optimal_sz = 600; enc_cfg->rc_buf_sz = 1000; - enc_cfg->g_error_resilient = 1; + if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0) + enc_cfg->g_error_resilient = 1; // Initialize codec res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR); -- 2.40.0