From 0dccb6277c9350c157e866c243e9c93956635e6e Mon Sep 17 00:00:00 2001 From: Marco Date: Mon, 5 Jan 2015 17:13:13 -0800 Subject: [PATCH] Modify variance partition selection for low resolutions. For low spatial resolutions: bias partittion selection to smaller block sizes, and base the variance computation on 4x4 down-sampling. Also move the threshold computations into the choose_partitioning, so they are computed once for each sb block. On low-res clips (RTC_derf) PSNR/SSIMetrics increase by about 4-5%. No change for resolutions above CIF. Change-Id: I93f8ff742c8044786977bb6e31dcf8efda6dd1b0 --- vp9/encoder/vp9_encodeframe.c | 217 +++++++++++++++++++++------------- 1 file changed, 132 insertions(+), 85 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 756052771..e142a3181 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -401,55 +401,42 @@ static int set_vt_partitioning(VP9_COMP *cpi, void *data, BLOCK_SIZE bsize, int mi_row, - int mi_col) { + int mi_col, + int threshold, + BLOCK_SIZE bsize_min) { VP9_COMMON * const cm = &cpi->common; variance_node vt; const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize]; - // TODO(marpan): Adjust/tune these thresholds. - const int threshold_multiplier = cm->frame_type == KEY_FRAME ? 80 : 4; - int64_t threshold = - (int64_t)(threshold_multiplier * - vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth)); - int64_t threshold_bsize_ref = threshold << 6; - int64_t threshold_low = threshold; - BLOCK_SIZE bsize_ref = BLOCK_16X16; assert(block_height == block_width); tree_to_node(data, bsize, &vt); - if (cm->frame_type == KEY_FRAME) { - bsize_ref = BLOCK_8X8; - // Choose lower thresholds for key frame variance to favor split, but keep - // threshold for splitting to 4x4 block still fairly high for now. - threshold_bsize_ref = threshold << 2; - threshold_low = threshold >> 2; - } - - // For bsize=bsize_ref (16x16/8x8 for 8x8/4x4 downsampling), select if + // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if // variance is below threshold, otherwise split will be selected. // No check for vert/horiz split as too few samples for variance. - if (bsize == bsize_ref) { + if (bsize == bsize_min) { get_variance(&vt.part_variances->none); if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && - vt.part_variances->none.variance < threshold_bsize_ref) { + vt.part_variances->none.variance < threshold) { set_block_size(cpi, xd, mi_row, mi_col, bsize); return 1; } return 0; - } else if (bsize > bsize_ref) { + } else if (bsize > bsize_min) { get_variance(&vt.part_variances->none); - // For key frame, for bsize above 32X32, or very high variance, take split. + // For key frame or low_res: for bsize above 32X32 or very high variance, + // take split. if (cm->frame_type == KEY_FRAME && (bsize > BLOCK_32X32 || - vt.part_variances->none.variance > (threshold << 2))) { + vt.part_variances->none.variance > (threshold << 4))) { return 0; } // If variance is low, take the bsize (no split). if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && - vt.part_variances->none.variance < threshold_low) { + vt.part_variances->none.variance < threshold) { set_block_size(cpi, xd, mi_row, mi_col, bsize); return 1; } @@ -458,8 +445,8 @@ static int set_vt_partitioning(VP9_COMP *cpi, if (mi_row + block_height / 2 < cm->mi_rows) { get_variance(&vt.part_variances->vert[0]); get_variance(&vt.part_variances->vert[1]); - if (vt.part_variances->vert[0].variance < threshold_low && - vt.part_variances->vert[1].variance < threshold_low) { + if (vt.part_variances->vert[0].variance < threshold && + vt.part_variances->vert[1].variance < threshold) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); set_block_size(cpi, xd, mi_row, mi_col, subsize); set_block_size(cpi, xd, mi_row, mi_col + block_width / 2, subsize); @@ -470,8 +457,8 @@ static int set_vt_partitioning(VP9_COMP *cpi, if (mi_col + block_width / 2 < cm->mi_cols) { get_variance(&vt.part_variances->horz[0]); get_variance(&vt.part_variances->horz[1]); - if (vt.part_variances->horz[0].variance < threshold_low && - vt.part_variances->horz[1].variance < threshold_low) { + if (vt.part_variances->horz[0].variance < threshold && + vt.part_variances->horz[1].variance < threshold) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); set_block_size(cpi, xd, mi_row, mi_col, subsize); set_block_size(cpi, xd, mi_row + block_height / 2, mi_col, subsize); @@ -485,8 +472,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, } // This function chooses partitioning based on the variance between source and -// reconstructed last, where variance is computed for downsampled inputs. -// Currently 8x8 downsampling is used for delta frames, 4x4 for key frames. +// reconstructed last, where variance is computed for downs-sampled inputs. static void choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *x, @@ -496,6 +482,7 @@ static void choose_partitioning(VP9_COMP *cpi, int i, j, k, m; v64x64 vt; + v16x16 vt2[16]; uint8_t *s; const uint8_t *d; int sp; @@ -504,6 +491,27 @@ static void choose_partitioning(VP9_COMP *cpi, const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; + // Always use 4x4 partition for key frame. + int use_4x4_partition = (cm->frame_type == KEY_FRAME); + + int variance4x4downsample[16]; + int low_res = (cm->width <= 352 && cm->height <= 288) ? 1 : 0; + const int threshold_multiplier = cm->frame_type == KEY_FRAME ? 80 : 4; + int64_t threshold_base = (int64_t)(threshold_multiplier * + vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth)); + int64_t threshold = threshold_base; + int64_t threshold_bsize_min = threshold_base << 6; + int64_t threshold_bsize_max = threshold_base; + // Modify thresholds for key frame and for low-resolutions (set lower + // thresholds to favor split). + if (cm->frame_type == KEY_FRAME) { + threshold = threshold_base >> 2; + threshold_bsize_min = threshold_base << 2; + } else if (low_res) { + threshold_bsize_min = threshold_base << 3; + threshold_bsize_max = threshold_base >> 2; + } + vp9_clear_system_state(); set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); @@ -546,130 +554,169 @@ static void choose_partitioning(VP9_COMP *cpi, #endif // CONFIG_VP9_HIGHBITDEPTH } - // Fill in the entire tree of 8x8 variances for splits. + // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances + // for splits. for (i = 0; i < 4; i++) { const int x32_idx = ((i & 1) << 5); const int y32_idx = ((i >> 1) << 5); + const int i2 = i << 2; for (j = 0; j < 4; j++) { const int x16_idx = x32_idx + ((j & 1) << 4); const int y16_idx = y32_idx + ((j >> 1) << 4); v16x16 *vst = &vt.split[i].split[j]; - for (k = 0; k < 4; k++) { - int x8_idx = x16_idx + ((k & 1) << 3); - int y8_idx = y16_idx + ((k >> 1) << 3); - if (cm->frame_type != KEY_FRAME) { - unsigned int sse = 0; - int sum = 0; - if (x8_idx < pixels_wide && y8_idx < pixels_high) { - int s_avg, d_avg; + variance4x4downsample[i2 + j] = 0; + if (cm->frame_type != KEY_FRAME) { + for (k = 0; k < 4; k++) { + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + unsigned int sse = 0; + int sum = 0; + if (x8_idx < pixels_wide && y8_idx < pixels_high) { + int s_avg, d_avg; #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); - d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); - } else { + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); + d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); + } else { + s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); + d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); + } +#else s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); - } -#else - s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp); - d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp); #endif - sum = s_avg - d_avg; - sse = sum * sum; - } - // If variance is based on 8x8 downsampling, we stop here and have - // one sample for 8x8 block (so use 1 for count in fill_variance), - // which of course means variance = 0 for 8x8 block. - fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); - } else { - // For key frame, go down to 4x4. - v8x8 *vst2 = &vst->split[k]; + sum = s_avg - d_avg; + sse = sum * sum; + } + // If variance is based on 8x8 downsampling, we stop here and have + // one sample for 8x8 block (so use 1 for count in fill_variance), + // which of course means variance = 0 for 8x8 block. + fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); + } + fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); + // For low-resolution, compute the variance based on 8x8 down-sampling, + // and if it is large (above the threshold) we go down for 4x4. + // For key frame we always go down to 4x4. + if (low_res) + get_variance(&vt.split[i].split[j].part_variances.none); + } + if (cm->frame_type == KEY_FRAME || (low_res && + vt.split[i].split[j].part_variances.none.variance > + (threshold << 1))) { + // Go down to 4x4 down-sampling for variance. + variance4x4downsample[i2 + j] = 1; + for (k = 0; k < 4; k++) { + int x8_idx = x16_idx + ((k & 1) << 3); + int y8_idx = y16_idx + ((k >> 1) << 3); + v8x8 *vst2 = (cm->frame_type == KEY_FRAME) ? &vst->split[k] : + &vt2[i2 + j].split[k]; for (m = 0; m < 4; m++) { int x4_idx = x8_idx + ((m & 1) << 2); int y4_idx = y8_idx + ((m >> 1) << 2); unsigned int sse = 0; int sum = 0; if (x4_idx < pixels_wide && y4_idx < pixels_high) { + int d_avg = 128; #if CONFIG_VP9_HIGHBITDEPTH int s_avg; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { s_avg = vp9_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); + if (cm->frame_type != KEY_FRAME) + d_avg = vp9_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); } else { s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp); + if (cm->frame_type != KEY_FRAME) + d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp); } #else int s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp); + if (cm->frame_type != KEY_FRAME) + d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp); #endif - // For key frame, reference is set to 128. - sum = s_avg - 128; + sum = s_avg - d_avg; sse = sum * sum; } - // If variance is based on 4x4 downsampling, we stop here and have + // If variance is based on 4x4 down-sampling, we stop here and have // one sample for 4x4 block (so use 1 for count in fill_variance), // which of course means variance = 0 for 4x4 block. - fill_variance(sse, sum, 0, &vst2->split[m].part_variances.none); + fill_variance(sse, sum, 0, &vst2->split[m].part_variances.none); } } } } } + // Fill the rest of the variance tree by summing split partition values. for (i = 0; i < 4; i++) { + const int i2 = i << 2; for (j = 0; j < 4; j++) { - if (cm->frame_type == KEY_FRAME) { + if (variance4x4downsample[i2 + j] == 1) { + v16x16 *vtemp = (cm->frame_type != KEY_FRAME) ? &vt2[i2 + j] : + &vt.split[i].split[j]; for (m = 0; m < 4; m++) { - fill_variance_tree(&vt.split[i].split[j].split[m], BLOCK_8X8); + fill_variance_tree(&vtemp->split[m], BLOCK_8X8); } + fill_variance_tree(vtemp, BLOCK_16X16); } - fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); } fill_variance_tree(&vt.split[i], BLOCK_32X32); } fill_variance_tree(&vt, BLOCK_64X64); + // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold. if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || - !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col)) { + !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col, + threshold_bsize_max, BLOCK_16X16)) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); + const int i2 = i << 2; if (!set_vt_partitioning(cpi, xd, &vt.split[i], BLOCK_32X32, - (mi_row + y32_idx), (mi_col + x32_idx))) { + (mi_row + y32_idx), (mi_col + x32_idx), + threshold, BLOCK_16X16)) { for (j = 0; j < 4; ++j) { const int x16_idx = ((j & 1) << 1); const int y16_idx = ((j >> 1) << 1); - // Note: If 8x8 downsampling is used for variance calculation we - // cannot really select block size 8x8 (or even 8x16/16x8), since we - // don't have sufficient samples for variance. So on delta frames, - // 8x8 partition is only set if variance of the 16x16 block is very - // high. For key frames, 4x4 downsampling is used, so we can better - // select 8x16/16x8 and 8x8. 4x4 partition can potentially be set - // used here too, but for now 4x4 is not allowed. - if (!set_vt_partitioning(cpi, xd, &vt.split[i].split[j], - BLOCK_16X16, + // TODO(marpan): Allow 4x4 partitions for inter-frames. + // use_4x4_partition = (variance4x4downsample[i2 + j] == 1); + // If 4x4 partition is not used, then 8x8 partition will be selected + // if variance of 16x16 block is very high, so use larger threshold + // for 16x16 (threshold_bsize_min) in that case. + uint64_t threshold_16x16 = (use_4x4_partition) ? threshold : + threshold_bsize_min; + BLOCK_SIZE bsize_min = (use_4x4_partition) ? BLOCK_8X8 : BLOCK_16X16; + // For inter frames: if variance4x4downsample[] == 1 for this 16x16 + // block, then the variance is based on 4x4 down-sampling, so use vt2 + // in set_vt_partioning(), otherwise use vt. + v16x16 *vtemp = (cm->frame_type != KEY_FRAME && + variance4x4downsample[i2 + j] == 1) ? + &vt2[i2 + j] : &vt.split[i].split[j]; + if (!set_vt_partitioning(cpi, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, - mi_col + x32_idx + x16_idx)) { + mi_col + x32_idx + x16_idx, + threshold_16x16, bsize_min)) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); - if (cm->frame_type == KEY_FRAME) { - if (!set_vt_partitioning(cpi, xd, - &vt.split[i].split[j].split[k], + if (use_4x4_partition) { + if (!set_vt_partitioning(cpi, xd, &vtemp->split[k], BLOCK_8X8, mi_row + y32_idx + y16_idx + y8_idx, - mi_col + x32_idx + x16_idx + x8_idx)) { - set_block_size(cpi, xd, - (mi_row + y32_idx + y16_idx + y8_idx), - (mi_col + x32_idx + x16_idx + x8_idx), - BLOCK_4X4); + mi_col + x32_idx + x16_idx + x8_idx, + threshold_bsize_min, BLOCK_8X8)) { + set_block_size(cpi, xd, + (mi_row + y32_idx + y16_idx + y8_idx), + (mi_col + x32_idx + x16_idx + x8_idx), + BLOCK_4X4); } } else { set_block_size(cpi, xd, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); - } + } } } } -- 2.40.0