From 1d5ca84df600ab610d14c0356b038ad4c395a6fd Mon Sep 17 00:00:00 2001 From: Jerome Jiang Date: Mon, 19 Dec 2016 10:39:04 -0800 Subject: [PATCH] vp9: Add feature to copy partition from the last frame. Add feature to copy partition from the last frame. The copy is only done under certain conditions that SAD is below threshold. Feature is currently disabled, until threshold is tuned. Feature will be initially used for Speed 8 (ARM). Under extreme case of always copying partition for speed 8: Encode time is reduced by 5.4% on rtc_derf and 7.8% on rtc. Overall PSNR reduced by 2.1 on rtc_derf and 0.968 on rtc. Change-Id: I1bcab515af3088e4d60675758f72613c2d3dc7a5 --- vp9/encoder/vp9_encodeframe.c | 111 ++++++++++++++++++++++++++++++- vp9/encoder/vp9_encoder.c | 6 ++ vp9/encoder/vp9_encoder.h | 6 ++ vp9/encoder/vp9_speed_features.c | 13 ++++ vp9/encoder/vp9_speed_features.h | 3 + 5 files changed, 138 insertions(+), 1 deletion(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index ac1fd8f6d..780d9855d 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -526,6 +526,7 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) { : 1000; cpi->vbp_bsize_min = BLOCK_16X16; } + cpi->vbp_threshold_copy = cpi->vbp_thresholds[0] << 16; cpi->vbp_threshold_minmax = 15 + (q >> 3); } } @@ -766,6 +767,93 @@ static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, } } +static void copy_prev_partition(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, + int mi_col) { + VP9_COMMON *const cm = &cpi->common; + BLOCK_SIZE *prev_part = cpi->prev_partition; + int start_pos = mi_row * cm->mi_stride + mi_col; + + const int bsl = b_width_log2_lookup[bsize]; + const int bs = (1 << bsl) / 4; + BLOCK_SIZE subsize; + PARTITION_TYPE partition; + MODE_INFO *mi = NULL; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + + partition = partition_lookup[bsl][prev_part[start_pos]]; + subsize = get_subsize(bsize, partition); + mi = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]; + + if (subsize < BLOCK_8X8) { + mi->sb_type = bsize; + } else { + switch (partition) { + case PARTITION_NONE: mi->sb_type = bsize; break; + case PARTITION_HORZ: + mi->sb_type = subsize; + if (mi_row + bs < cm->mi_rows) + cm->mi_grid_visible[(mi_row + bs) * cm->mi_stride + mi_col]->sb_type = + subsize; + break; + case PARTITION_VERT: + mi->sb_type = subsize; + if (mi_col + bs < cm->mi_cols) + cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col + bs]->sb_type = + subsize; + break; + case PARTITION_SPLIT: + copy_prev_partition(cpi, subsize, mi_row, mi_col); + copy_prev_partition(cpi, subsize, mi_row + bs, mi_col); + copy_prev_partition(cpi, subsize, mi_row, mi_col + bs); + copy_prev_partition(cpi, subsize, mi_row + bs, mi_col + bs); + break; + default: assert(0); + } + } +} + +static void update_prev_partition(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, + int mi_col) { + VP9_COMMON *const cm = &cpi->common; + BLOCK_SIZE *prev_part = cpi->prev_partition; + int start_pos = mi_row * cm->mi_stride + mi_col; + const int bsl = b_width_log2_lookup[bsize]; + const int bs = (1 << bsl) / 4; + BLOCK_SIZE subsize; + PARTITION_TYPE partition; + const MODE_INFO *mi = NULL; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + + mi = cm->mi_grid_visible[start_pos]; + partition = partition_lookup[bsl][mi->sb_type]; + subsize = get_subsize(bsize, partition); + if (subsize < BLOCK_8X8) { + prev_part[start_pos] = bsize; + } else { + switch (partition) { + case PARTITION_NONE: prev_part[start_pos] = bsize; break; + case PARTITION_HORZ: + prev_part[start_pos] = subsize; + if (mi_row + bs < cm->mi_rows) + prev_part[start_pos + bs * cm->mi_stride] = subsize; + break; + case PARTITION_VERT: + prev_part[start_pos] = subsize; + if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; + break; + case PARTITION_SPLIT: + update_prev_partition(cpi, subsize, mi_row, mi_col); + update_prev_partition(cpi, subsize, mi_row + bs, mi_col); + update_prev_partition(cpi, subsize, mi_row, mi_col + bs); + update_prev_partition(cpi, subsize, mi_row + bs, mi_col + bs); + break; + default: assert(0); + } + } +} + static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, unsigned int y_sad, int is_key_frame) { int i; @@ -828,6 +916,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, const int low_res = (cm->width <= 352 && cm->height <= 288); int variance4x4downsample[16]; int segment_id; + int offset = cm->mi_stride * mi_row + mi_col; set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); segment_id = xd->mi[0]->segment_id; @@ -861,7 +950,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); const YV12_BUFFER_CONFIG *yv12_g = NULL; - unsigned int y_sad_g, y_sad_thr; + unsigned int y_sad_g, y_sad_thr, y_sad_last; bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); @@ -901,6 +990,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, mi->interp_filter = BILINEAR; y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); + y_sad_last = y_sad; // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad // are close if short_circuit_low_temp_var is on. y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; @@ -941,6 +1031,20 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, return 0; } } + + // If the y_sad is small enough, copy the partition of the superblock in the + // last frame to current frame only if the last frame is not a keyframe. + // TODO(jianj) : tune the threshold. + if (cpi->sf.copy_partition_flag && cpi->rc.frames_since_key > 1 && + segment_id == CR_SEGMENT_ID_BASE && + cpi->prev_segment_id[offset] == CR_SEGMENT_ID_BASE && + y_sad_last < cpi->vbp_threshold_copy) { + if (cpi->prev_partition != NULL) { + copy_prev_partition(cpi, BLOCK_64X64, mi_row, mi_col); + chroma_check(cpi, x, bsize, y_sad, is_key_frame); + return 0; + } + } } else { d = VP9_VAR_OFFS; dp = 0; @@ -1135,6 +1239,11 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } + if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) { + update_prev_partition(cpi, BLOCK_64X64, mi_row, mi_col); + cpi->prev_segment_id[offset] = segment_id; + } + if (cpi->sf.short_circuit_low_temp_var) { set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition, mi_col, mi_row); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 600ebec9c..476cb2d69 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -439,6 +439,12 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->nmvsadcosts_hp[0] = NULL; cpi->nmvsadcosts_hp[1] = NULL; + vpx_free(cpi->prev_partition); + cpi->prev_partition = NULL; + + vpx_free(cpi->prev_segment_id); + cpi->prev_segment_id = NULL; + vp9_cyclic_refresh_free(cpi->cyclic_refresh); cpi->cyclic_refresh = NULL; diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 0007e6395..833d6a29e 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -594,6 +594,8 @@ typedef struct VP9_COMP { int64_t vbp_thresholds[4]; int64_t vbp_threshold_minmax; int64_t vbp_threshold_sad; + // Threshold used for partition copy + int64_t vbp_threshold_copy; BLOCK_SIZE vbp_bsize_min; // Multi-threading @@ -605,6 +607,10 @@ typedef struct VP9_COMP { int keep_level_stats; Vp9LevelInfo level_info; + + // Previous Partition Info + BLOCK_SIZE *prev_partition; + int8_t *prev_segment_id; } VP9_COMP; void vp9_initialize_enc(void); diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index f500f2f98..f0d3bf2f4 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -311,6 +311,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, sf->allow_exhaustive_searches = 0; sf->exhaustive_searches_thresh = INT_MAX; sf->allow_acl = 0; + sf->copy_partition_flag = 0; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -496,6 +497,18 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, if (speed >= 8) { sf->adaptive_rd_thresh = 4; + // Disabled for now until the threshold is tuned. + sf->copy_partition_flag = 0; + if (sf->copy_partition_flag) { + if (cpi->prev_partition == NULL) { + cpi->prev_partition = (BLOCK_SIZE *)vpx_calloc( + cm->mi_stride * cm->mi_rows, sizeof(BLOCK_SIZE)); + } + if (cpi->prev_segment_id == NULL) { + cpi->prev_segment_id = + (int8_t *)vpx_calloc(cm->mi_stride * cm->mi_rows, sizeof(int8_t)); + } + } sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2; if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF; // Only keep INTRA_DC mode for speed 8. diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index a306f7aea..478684d05 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -474,6 +474,9 @@ typedef struct SPEED_FEATURES { // Bias to use base mv and skip 1/4 subpel search when use base mv in // enhancement layer. int base_mv_aggressive; + + // Global flag to enable partition copy from the previous frame. + int copy_partition_flag; } SPEED_FEATURES; struct VP9_COMP; -- 2.40.0