From a5c17a689fb9b7f04b62c084acebbad9b666efba Mon Sep 17 00:00:00 2001 From: Jerome Jiang Date: Fri, 10 Aug 2018 16:14:00 -0700 Subject: [PATCH] SVC: extend api to specify temporal id for each spatial layers. BUG=b/112294545 Change-Id: I5be230c8969d69af3ad87068fdf3834ef1af11d9 --- examples/vp9_spatial_svc_encoder.c | 122 +++++++++++++++++++++++++--- examples/vpx_temporal_svc_encoder.c | 5 +- test/svc_datarate_test.cc | 2 + test/vp9_datarate_test.cc | 2 + vp9/encoder/vp9_svc_layercontext.c | 1 + vp9/encoder/vp9_svc_layercontext.h | 2 + vp9/vp9_cx_iface.c | 7 ++ vpx/vp8cx.h | 4 +- 8 files changed, 129 insertions(+), 16 deletions(-) diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index 266ba2656..2fcc099c1 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -592,9 +592,9 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz, // bypass/flexible mode. The pattern corresponds to the pattern // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in // non-flexible mode. -void set_frame_flags_bypass_mode(int tl, int num_spatial_layers, - int is_key_frame, - vpx_svc_ref_frame_config_t *ref_frame_config) { +static void set_frame_flags_bypass_mode_ex0( + int tl, int num_spatial_layers, int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { int sl; for (sl = 0; sl < num_spatial_layers; ++sl) ref_frame_config->update_buffer_slot[sl] = 0; @@ -672,6 +672,71 @@ void set_frame_flags_bypass_mode(int tl, int num_spatial_layers, } } +// Example pattern for 2 spatial layers and 2 temporal layers used in the +// bypass/flexible mode, except only 1 spatial layer when temporal_layer_id = 1. +static void set_frame_flags_bypass_mode_ex1( + int tl, int num_spatial_layers, int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { + int sl; + for (sl = 0; sl < num_spatial_layers; ++sl) + ref_frame_config->update_buffer_slot[sl] = 0; + + if (tl == 0) { + if (is_key_frame) { + ref_frame_config->lst_fb_idx[1] = 0; + ref_frame_config->gld_fb_idx[1] = 1; + } else { + ref_frame_config->lst_fb_idx[1] = 1; + ref_frame_config->gld_fb_idx[1] = 0; + } + ref_frame_config->alt_fb_idx[1] = 0; + + ref_frame_config->lst_fb_idx[0] = 0; + ref_frame_config->gld_fb_idx[0] = 0; + ref_frame_config->alt_fb_idx[0] = 0; + } + if (tl == 1) { + ref_frame_config->lst_fb_idx[0] = 0; + ref_frame_config->gld_fb_idx[0] = 1; + ref_frame_config->alt_fb_idx[0] = 2; + + ref_frame_config->lst_fb_idx[1] = 1; + ref_frame_config->gld_fb_idx[1] = 2; + ref_frame_config->alt_fb_idx[1] = 3; + } + // Set the reference and update flags. + if (tl == 0) { + // Base spatial and base temporal (sl = 0, tl = 0) + ref_frame_config->reference_last[0] = 1; + ref_frame_config->reference_golden[0] = 0; + ref_frame_config->reference_alt_ref[0] = 0; + ref_frame_config->update_buffer_slot[0] |= + 1 << ref_frame_config->lst_fb_idx[0]; + + if (is_key_frame) { + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 0; + ref_frame_config->reference_alt_ref[1] = 0; + ref_frame_config->update_buffer_slot[1] |= + 1 << ref_frame_config->gld_fb_idx[1]; + } else { + // Non-zero spatiall layer. + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 1; + ref_frame_config->reference_alt_ref[1] = 1; + ref_frame_config->update_buffer_slot[1] |= + 1 << ref_frame_config->lst_fb_idx[1]; + } + } + if (tl == 1) { + // Top spatial and top temporal (non-reference -- doesn't update any + // reference buffers) + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 0; + ref_frame_config->reference_alt_ref[1] = 0; + } +} + int main(int argc, const char **argv) { AppInput app_input; VpxVideoWriter *writer = NULL; @@ -704,6 +769,8 @@ int main(int argc, const char **argv) { memset(&svc_ctx, 0, sizeof(svc_ctx)); memset(&app_input, 0, sizeof(AppInput)); memset(&info, 0, sizeof(VpxVideoInfo)); + memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); + memset(&rc, 0, sizeof(struct RateControlStats)); exec_name = argv[0]; parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg); @@ -801,6 +868,12 @@ int main(int argc, const char **argv) { while (!end_of_stream) { vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *cx_pkt; + // Example patterns for bypass/flexible mode: + // example_pattern = 0: 2 temporal layers, and spatial_layers = 1,2,3. Exact + // to fixed SVC patterns. example_pattern = 1: 2 spatial and 2 temporal + // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example + // uses the extended API. + int example_pattern = 1; if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) { // We need one extra vpx_svc_encode call at end of stream to flush // encoder and get remaining data @@ -809,26 +882,49 @@ int main(int argc, const char **argv) { // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates) // and the buffer indices for each spatial layer of the current - // (super)frame to be encoded. The temporal layer_id for the current frame - // also needs to be set. + // (super)frame to be encoded. The spatial and temporal layer_id for the + // current frame also needs to be set. // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS" // mode to "VP9E_LAYERING_MODE_BYPASS". if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { layer_id.spatial_layer_id = 0; // Example for 2 temporal layers. - if (frame_cnt % 2 == 0) + if (frame_cnt % 2 == 0) { layer_id.temporal_layer_id = 0; - else + for (i = 0; i < VPX_SS_MAX_LAYERS; i++) + layer_id.temporal_layer_id_per_spatial[i] = 0; + } else { layer_id.temporal_layer_id = 1; - // Note that we only set the temporal layer_id, since we are calling - // the encode for the whole superframe. The encoder will internally loop - // over all the spatial layers for the current superframe. + for (i = 0; i < VPX_SS_MAX_LAYERS; i++) + layer_id.temporal_layer_id_per_spatial[i] = 1; + } + if (example_pattern == 1) { + // example_pattern 1 is hard-coded for 2 spatial and 2 temporal layers. + assert(svc_ctx.spatial_layers == 2); + assert(svc_ctx.temporal_layers == 2); + if (frame_cnt % 2 == 0) { + // Spatial layer 0 and 1 are encoded. + layer_id.temporal_layer_id_per_spatial[0] = 0; + layer_id.temporal_layer_id_per_spatial[1] = 0; + layer_id.spatial_layer_id = 0; + } else { + // Only spatial layer 1 is encoded here. + layer_id.temporal_layer_id_per_spatial[1] = 1; + layer_id.spatial_layer_id = 1; + } + } vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); // TODO(jianj): Fix the parameter passing for "is_key_frame" in // set_frame_flags_bypass_model() for case of periodic key frames. - set_frame_flags_bypass_mode(layer_id.temporal_layer_id, - svc_ctx.spatial_layers, frame_cnt == 0, - &ref_frame_config); + if (example_pattern == 0) { + set_frame_flags_bypass_mode_ex0(layer_id.temporal_layer_id, + svc_ctx.spatial_layers, frame_cnt == 0, + &ref_frame_config); + } else if (example_pattern == 1) { + set_frame_flags_bypass_mode_ex1(layer_id.temporal_layer_id, + svc_ctx.spatial_layers, frame_cnt == 0, + &ref_frame_config); + } vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config); // Keep track of input frames, to account for frame drops in rate control diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c index ea475fbf9..f49ef7b1d 100644 --- a/examples/vpx_temporal_svc_encoder.c +++ b/examples/vpx_temporal_svc_encoder.c @@ -592,7 +592,7 @@ int main(int argc, char **argv) { #if ROI_MAP vpx_roi_map_t roi; #endif - vpx_svc_layer_id_t layer_id = { 0, 0 }; + vpx_svc_layer_id_t layer_id; const VpxInterface *encoder = NULL; FILE *infile = NULL; struct RateControlMetrics rc; @@ -610,7 +610,7 @@ int main(int argc, char **argv) { double framerate = 30.0; zero(rc.layer_target_bitrate); - + memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); exec_name = argv[0]; // Check usage and arguments. if (argc < min_args) { @@ -856,6 +856,7 @@ int main(int argc, char **argv) { layer_id.spatial_layer_id = 0; layer_id.temporal_layer_id = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; + layer_id.temporal_layer_id_per_spatial[0] = layer_id.temporal_layer_id; if (strncmp(encoder->name, "vp9", 3) == 0) { vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); } else if (strncmp(encoder->name, "vp8", 3) == 0) { diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc index f1d65ce37..117014fa6 100644 --- a/test/svc_datarate_test.cc +++ b/test/svc_datarate_test.cc @@ -186,6 +186,8 @@ class DatarateOnePassCbrSvc : public ::svc_test::OnePassCbrSvc { layer_id.spatial_layer_id = 0; layer_id.temporal_layer_id = (video->frame() % 2 != 0); temporal_layer_id_ = layer_id.temporal_layer_id; + for (int i = 0; i < number_spatial_layers_; i++) + layer_id.temporal_layer_id_per_spatial[i] = temporal_layer_id_; encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id); set_frame_flags_bypass_mode(layer_id.temporal_layer_id, number_spatial_layers_, 0, &ref_frame_config); diff --git a/test/vp9_datarate_test.cc b/test/vp9_datarate_test.cc index b700f2a9c..4fa46d001 100644 --- a/test/vp9_datarate_test.cc +++ b/test/vp9_datarate_test.cc @@ -144,6 +144,8 @@ class DatarateTestVP9 : public ::libvpx_test::EncoderTest { frame_flags_ = GetFrameFlags(video->frame(), cfg_.ts_number_layers); layer_id.temporal_layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers); + layer_id.temporal_layer_id_per_spatial[0] = + SetLayerId(video->frame(), cfg_.ts_number_layers); encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id); } const vpx_rational_t tb = video->timebase(); diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 3fa25ae14..313fb6c06 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -674,6 +674,7 @@ static void set_flags_and_fb_idx_bypass_via_set_ref_frame_config( VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; int sl = svc->spatial_layer_id = svc->spatial_layer_to_encode; + cpi->svc.temporal_layer_id = cpi->svc.temporal_layer_id_per_spatial[sl]; cpi->ext_refresh_frame_flags_pending = 1; cpi->lst_fb_idx = svc->lst_fb_idx[sl]; cpi->gld_fb_idx = svc->gld_fb_idx[sl]; diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index b911575fc..cb2884e22 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -173,6 +173,8 @@ typedef struct SVC { uint8_t fb_idx_base[REF_FRAMES]; int use_set_ref_frame_config; + + int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS]; } SVC; struct VP9_COMP; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 45a846459..11ba8df08 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -1446,9 +1446,16 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *); VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; + int sl; svc->spatial_layer_to_encode = data->spatial_layer_id; + // TODO(jianj): Deprecated to be removed. svc->temporal_layer_id = data->temporal_layer_id; + // Allow for setting temporal layer per spatial layer for superframe. + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + svc->temporal_layer_id_per_spatial[sl] = + data->temporal_layer_id_per_spatial[sl]; + } // Checks on valid layer_id input. if (svc->temporal_layer_id < 0 || svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 5ede91a20..6aa5dcb98 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -784,8 +784,10 @@ typedef enum { VP8_TUNE_PSNR, VP8_TUNE_SSIM } vp8e_tuning; * */ typedef struct vpx_svc_layer_id { - int spatial_layer_id; /**< Spatial layer id number. */ + int spatial_layer_id; /**< First spatial layer to start encoding. */ + // TODO(jianj): Deprecated, to be removed. int temporal_layer_id; /**< Temporal layer id number. */ + int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS]; /**< Temp layer id. */ } vpx_svc_layer_id_t; /*!\brief vp9 svc frame flag parameters. -- 2.40.0