From 61af8981b0e3689432781f94fd0e2f834ae33adf Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Mon, 11 Apr 2016 17:41:58 +0100 Subject: [PATCH] Extend variance based partitioning to 128x128 superblocks Change-Id: I41edf266d5540a9b070a5e65bc397dd3da210507 --- vp10/encoder/encodeframe.c | 856 +++++++++++++++-------------------- vp10/encoder/encoder.c | 7 + vp10/encoder/encoder.h | 13 +- vp10/encoder/ethread.c | 4 + vp10/encoder/variance_tree.c | 63 +++ vp10/encoder/variance_tree.h | 98 ++++ vp10/vp10cx.mk | 2 + vpx_dsp/avg.c | 39 +- vpx_dsp/variance.h | 37 +- 9 files changed, 594 insertions(+), 525 deletions(-) create mode 100644 vp10/encoder/variance_tree.c create mode 100644 vp10/encoder/variance_tree.h diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 88e94867e..e49e0d93d 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -49,6 +49,12 @@ #include "vp10/encoder/segmentation.h" #include "vp10/encoder/tokenize.h" +#if CONFIG_VP9_HIGHBITDEPTH +# define IF_HBD(...) __VA_ARGS__ +#else +# define IF_HBD(...) +#endif // CONFIG_VP9_HIGHBITDEPTH + static void encode_superblock(VP10_COMP *cpi, ThreadData * td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -413,234 +419,102 @@ static void set_block_size(VP10_COMP * const cpi, } } -typedef struct { - int64_t sum_square_error; - int64_t sum_error; - int log2_count; - int variance; -} var; - -typedef struct { - var none; - var horz[2]; - var vert[2]; -} partition_variance; - -typedef struct { - partition_variance part_variances; - var split[4]; -} v4x4; - -typedef struct { - partition_variance part_variances; - v4x4 split[4]; -} v8x8; - -typedef struct { - partition_variance part_variances; - v8x8 split[4]; -} v16x16; - -typedef struct { - partition_variance part_variances; - v16x16 split[4]; -} v32x32; - -typedef struct { - partition_variance part_variances; - v32x32 split[4]; -} v64x64; - -#if CONFIG_EXT_PARTITION -typedef struct { - partition_variance part_variances; - v64x64 split[4]; -} v128x128; -#endif // CONFIG_EXT_PARTITION - -typedef struct { - partition_variance *part_variances; - var *split[4]; -} variance_node; - -typedef enum { - V16X16, - V32X32, - V64X64, -#if CONFIG_EXT_PARTITION - V128X128, -#endif // CONFIG_EXT_PARTITION -} TREE_LEVEL; - -static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { - int i; - node->part_variances = NULL; - switch (bsize) { -#if CONFIG_EXT_PARTITION - case BLOCK_128X128: { - v128x128 *vt = (v128x128 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } -#endif // CONFIG_EXT_PARTITION - case BLOCK_64X64: { - v64x64 *vt = (v64x64 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } - case BLOCK_32X32: { - v32x32 *vt = (v32x32 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } - case BLOCK_16X16: { - v16x16 *vt = (v16x16 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } - case BLOCK_8X8: { - v8x8 *vt = (v8x8 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i].part_variances.none; - break; - } - case BLOCK_4X4: { - v4x4 *vt = (v4x4 *) data; - node->part_variances = &vt->part_variances; - for (i = 0; i < 4; i++) - node->split[i] = &vt->split[i]; - break; - } - default: { - assert(0); - break; - } - } -} - -// Set variance values given sum square error, sum error, count. -static void fill_variance(int64_t s2, int64_t s, int c, var *v) { - v->sum_square_error = s2; - v->sum_error = s; - v->log2_count = c; -} - -static void get_variance(var *v) { - v->variance = (int)(256 * (v->sum_square_error - - ((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count); -} - -static void sum_2_variances(const var *a, const var *b, var *r) { - assert(a->log2_count == b->log2_count); - fill_variance(a->sum_square_error + b->sum_square_error, - a->sum_error + b->sum_error, a->log2_count + 1, r); -} - -static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { - variance_node node; - memset(&node, 0, sizeof(node)); - tree_to_node(data, bsize, &node); - sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); - sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); - sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); - sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); - sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], - &node.part_variances->none); -} - -static int set_vt_partitioning(VP10_COMP *cpi, +static void set_vt_partitioning(VP10_COMP *cpi, MACROBLOCK *const x, MACROBLOCKD *const xd, - void *data, - BLOCK_SIZE bsize, + VAR_TREE *vt, int mi_row, int mi_col, - int64_t threshold, - BLOCK_SIZE bsize_min, - int force_split) { + const int64_t *const threshold, + const BLOCK_SIZE *const bsize_min) { VP10_COMMON * const cm = &cpi->common; - variance_node vt; - const int block_width = num_8x8_blocks_wide_lookup[bsize]; - const int block_height = num_8x8_blocks_high_lookup[bsize]; - const int low_res = (cm->width <= 352 && cm->height <= 288); + const int hbw = num_8x8_blocks_wide_lookup[vt->bsize] / 2; + const int hbh = num_8x8_blocks_high_lookup[vt->bsize] / 2; + const int has_cols = mi_col + hbw < cm->mi_cols; + const int has_rows = mi_row + hbh < cm->mi_rows; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; - assert(block_height == block_width); - tree_to_node(data, bsize, &vt); + assert(vt->bsize >= BLOCK_8X8); - if (force_split == 1) - return 0; + assert(hbh == hbw); + + if (vt->force_split || (!has_cols && !has_rows)) + goto split; // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if // variance is below threshold, otherwise split will be selected. // No check for vert/horiz split as too few samples for variance. - if (bsize == bsize_min) { - // Variance already computed to set the force_split. - if (low_res || cm->frame_type == KEY_FRAME) - get_variance(&vt.part_variances->none); - if (mi_col + block_width / 2 < cm->mi_cols && - mi_row + block_height / 2 < cm->mi_rows && - vt.part_variances->none.variance < threshold) { - set_block_size(cpi, x, xd, mi_row, mi_col, bsize); - return 1; + if (vt->bsize == bsize_min[0]) { + if (has_cols && has_rows && + vt->variances.none.variance < threshold[0]) { + set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize); + return; + } else { + BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_SPLIT); + set_block_size(cpi, x, xd, mi_row, mi_col, subsize); + if (vt->bsize > BLOCK_8X8) { + set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize); + set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize); + set_block_size(cpi, x, xd, mi_row + hbh, mi_col + hbw, subsize); + } + return; } - return 0; - } else if (bsize > bsize_min) { - // Variance already computed to set the force_split. - if (low_res || cm->frame_type == KEY_FRAME) - get_variance(&vt.part_variances->none); + } else if (vt->bsize > bsize_min[0]) { // For key frame: take split for bsize above 32X32 or very high variance. if (cm->frame_type == KEY_FRAME && - (bsize > BLOCK_32X32 || - vt.part_variances->none.variance > (threshold << 4))) { - return 0; + (vt->bsize > BLOCK_32X32 || + vt->variances.none.variance > (threshold[0] << 4))) { + goto split; } // If variance is low, take the bsize (no split). - if (mi_col + block_width / 2 < cm->mi_cols && - mi_row + block_height / 2 < cm->mi_rows && - vt.part_variances->none.variance < threshold) { - set_block_size(cpi, x, xd, mi_row, mi_col, bsize); - return 1; + if (has_cols && has_rows && + vt->variances.none.variance < threshold[0]) { + set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize); + return; } // Check vertical split. - if (mi_row + block_height / 2 < cm->mi_rows) { - BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); - get_variance(&vt.part_variances->vert[0]); - get_variance(&vt.part_variances->vert[1]); - if (vt.part_variances->vert[0].variance < threshold && - vt.part_variances->vert[1].variance < threshold && + if (has_rows) { + BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_VERT); + if (vt->variances.vert[0].variance < threshold[0] && + vt->variances.vert[1].variance < threshold[0] && get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { set_block_size(cpi, x, xd, mi_row, mi_col, subsize); - set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); - return 1; + set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize); + return; } } // Check horizontal split. - if (mi_col + block_width / 2 < cm->mi_cols) { - BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); - get_variance(&vt.part_variances->horz[0]); - get_variance(&vt.part_variances->horz[1]); - if (vt.part_variances->horz[0].variance < threshold && - vt.part_variances->horz[1].variance < threshold && + if (has_cols) { + BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_HORZ); + if (vt->variances.horz[0].variance < threshold[0] && + vt->variances.horz[1].variance < threshold[0] && get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { set_block_size(cpi, x, xd, mi_row, mi_col, subsize); - set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); - return 1; + set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize); + return; } } + } - return 0; +split: + { + set_vt_partitioning(cpi, x, xd, vt->split[0], + mi_row, mi_col, + threshold + 1, bsize_min + 1); + set_vt_partitioning(cpi, x, xd, vt->split[1], + mi_row, mi_col + hbw, + threshold + 1, bsize_min + 1); + set_vt_partitioning(cpi, x, xd, vt->split[2], + mi_row + hbh, mi_col, + threshold + 1, bsize_min + 1); + set_vt_partitioning(cpi, x, xd, vt->split[3], + mi_row + hbh, mi_col + hbw, + threshold + 1, bsize_min + 1); + return; } - return 0; } // Set the variance split thresholds for following the block sizes: @@ -654,23 +528,24 @@ static void set_vbp_thresholds(VP10_COMP *cpi, int64_t thresholds[], int q) { const int64_t threshold_base = (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); if (is_key_frame) { - thresholds[0] = threshold_base; - thresholds[1] = threshold_base >> 2; + thresholds[1] = threshold_base; thresholds[2] = threshold_base >> 2; - thresholds[3] = threshold_base << 2; + thresholds[3] = threshold_base >> 2; + thresholds[4] = threshold_base << 2; } else { - thresholds[1] = threshold_base; + thresholds[2] = threshold_base; if (cm->width <= 352 && cm->height <= 288) { - thresholds[0] = threshold_base >> 2; - thresholds[2] = threshold_base << 3; + thresholds[1] = threshold_base >> 2; + thresholds[3] = threshold_base << 3; } else { - thresholds[0] = threshold_base; - thresholds[1] = (5 * threshold_base) >> 2; + thresholds[1] = threshold_base; + thresholds[2] = (5 * threshold_base) >> 2; if (cm->width >= 1920 && cm->height >= 1080) - thresholds[1] = (7 * threshold_base) >> 2; - thresholds[2] = threshold_base << cpi->oxcf.speed; + thresholds[2] = (7 * threshold_base) >> 2; + thresholds[3] = threshold_base << cpi->oxcf.speed; } } + thresholds[0] = INT64_MIN; } void vp10_set_variance_partition_thresholds(VP10_COMP *cpi, int q) { @@ -699,10 +574,10 @@ void vp10_set_variance_partition_thresholds(VP10_COMP *cpi, int q) { } // Compute the minmax over the 8x8 subblocks. -static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, - int dp, int x16_idx, int y16_idx, +static int compute_minmax_8x8(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, #if CONFIG_VP9_HIGHBITDEPTH - int highbd_flag, + int highbd, #endif int pixels_wide, int pixels_high) { @@ -711,24 +586,26 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, int minmax_min = 255; // Loop over the 4 8x8 subblocks. for (k = 0; k < 4; k++) { - int x8_idx = x16_idx + ((k & 1) << 3); - int y8_idx = y16_idx + ((k >> 1) << 3); + const int x8_idx = ((k & 1) << 3); + const int y8_idx = ((k >> 1) << 3); int min = 0; int max = 0; if (x8_idx < pixels_wide && y8_idx < pixels_high) { + const int src_offset = y8_idx * src_stride + x8_idx; + const int ref_offset = y8_idx * ref_stride + x8_idx; #if CONFIG_VP9_HIGHBITDEPTH - if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, - d + y8_idx * dp + x8_idx, dp, + if (highbd) { + vpx_highbd_minmax_8x8(src + src_offset, src_stride, + ref + ref_offset, ref_stride, &min, &max); } else { - vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, - d + y8_idx * dp + x8_idx, dp, + vpx_minmax_8x8(src + src_offset, src_stride, + ref + ref_offset, ref_stride, &min, &max); } #else - vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, - d + y8_idx * dp + x8_idx, dp, + vpx_minmax_8x8(src + src_offset, src_stride, + ref + ref_offset, ref_stride, &min, &max); #endif if ((max - min) > minmax_max) @@ -740,110 +617,252 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, return (minmax_max - minmax_min); } -static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, - int dp, int x8_idx, int y8_idx, v8x8 *vst, -#if CONFIG_VP9_HIGHBITDEPTH - int highbd_flag, -#endif - int pixels_wide, - int pixels_high, - int is_key_frame) { - int k; - for (k = 0; k < 4; k++) { - int x4_idx = x8_idx + ((k & 1) << 2); - int y4_idx = y8_idx + ((k >> 1) << 2); - unsigned int sse = 0; - int sum = 0; - if (x4_idx < pixels_wide && y4_idx < pixels_high) { - int s_avg; - int d_avg = 128; #if CONFIG_VP9_HIGHBITDEPTH - if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (!is_key_frame) - d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); - } else { - s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (!is_key_frame) - d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); - } +static INLINE int avg_4x4(const uint8_t *const src, const int stride, + const int highbd) { + if (highbd) { + return vpx_highbd_avg_4x4(src, stride); + } else { + return vpx_avg_4x4(src, stride); + } +} #else - s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); - if (!is_key_frame) - d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); +static INLINE int avg_4x4(const uint8_t *const src, const int stride) { + return vpx_avg_4x4(src, stride); +} #endif - sum = s_avg - d_avg; - sse = sum * sum; - } - fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE int avg_8x8(const uint8_t *const src, const int stride, + const int highbd) { + if (highbd) { + return vpx_highbd_avg_8x8(src, stride); + } else { + return vpx_avg_8x8(src, stride); } } +#else +static INLINE int avg_8x8(const uint8_t *const src, const int stride) { + return vpx_avg_8x8(src, stride); +} +#endif -static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, - int dp, int x16_idx, int y16_idx, v16x16 *vst, +static void init_variance_tree(VAR_TREE *const vt, #if CONFIG_VP9_HIGHBITDEPTH - int highbd_flag, + const int highbd, #endif - int pixels_wide, - int pixels_high, - int is_key_frame) { - int k; - for (k = 0; k < 4; k++) { - int x8_idx = x16_idx + ((k & 1) << 3); - int y8_idx = y16_idx + ((k >> 1) << 3); + BLOCK_SIZE bsize, + BLOCK_SIZE leaf_size, + const int width, const int height, + const uint8_t *const src, const int src_stride, + const uint8_t *const ref, const int ref_stride) { + assert(bsize >= leaf_size); + + vt->bsize = bsize; + + vt->force_split = 0; + + vt->src = src; + vt->src_stride = src_stride; + vt->ref = ref; + vt->ref_stride = ref_stride; + + vt->width = width; + vt->height = height; + +#if CONFIG_VP9_HIGHBITDEPTH + vt->highbd = highbd; +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (bsize > leaf_size) { + const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); + const int px = num_4x4_blocks_wide_lookup[subsize] * 4; + + init_variance_tree(vt->split[0], +#if CONFIG_VP9_HIGHBITDEPTH + highbd, +#endif // CONFIG_VP9_HIGHBITDEPTH + subsize, leaf_size, + VPXMIN(px, width), VPXMIN(px, height), + src, src_stride, + ref, ref_stride); + init_variance_tree(vt->split[1], +#if CONFIG_VP9_HIGHBITDEPTH + highbd, +#endif // CONFIG_VP9_HIGHBITDEPTH + subsize, leaf_size, + width - px, VPXMIN(px, height), + src + px, src_stride, + ref + px, ref_stride); + init_variance_tree(vt->split[2], +#if CONFIG_VP9_HIGHBITDEPTH + highbd, +#endif // CONFIG_VP9_HIGHBITDEPTH + subsize, leaf_size, + VPXMIN(px, width), height - px, + src + px * src_stride, src_stride, + ref + px * ref_stride, ref_stride); + init_variance_tree(vt->split[3], +#if CONFIG_VP9_HIGHBITDEPTH + highbd, +#endif // CONFIG_VP9_HIGHBITDEPTH + subsize, leaf_size, + width - px, height - px, + src + px * src_stride + px, src_stride, + ref + px * ref_stride + px, ref_stride); + } +} + + +// Fill the variance tree based on averaging pixel values (sub-sampling), at +// the leaf node size. +static void fill_variance_tree(VAR_TREE *const vt, + const BLOCK_SIZE leaf_size) { + if (vt->bsize > leaf_size) { + fill_variance_tree(vt->split[0], leaf_size); + fill_variance_tree(vt->split[1], leaf_size); + fill_variance_tree(vt->split[2], leaf_size); + fill_variance_tree(vt->split[3], leaf_size); + fill_variance_node(vt); + } else if (vt->width <= 0 || vt->height <= 0) { + fill_variance(0, 0, 0, &vt->variances.none); + } else { unsigned int sse = 0; int sum = 0; - if (x8_idx < pixels_wide && y8_idx < pixels_high) { - int s_avg; - int d_avg = 128; + int src_avg; + int ref_avg; + assert(leaf_size == BLOCK_4X4 || leaf_size == BLOCK_8X8); + if (leaf_size == BLOCK_4X4) { + src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd)); + ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); + } else { + src_avg = avg_8x8(vt->src, vt->src_stride IF_HBD(, vt->highbd)); + ref_avg = avg_8x8(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); + } + sum = src_avg - ref_avg; + sse = sum * sum; + fill_variance(sse, sum, 0, &vt->variances.none); + } +} + +static void refine_variance_tree(VAR_TREE *const vt, const int64_t threshold) { + if (vt->bsize >= BLOCK_8X8) { + if (vt->bsize == BLOCK_16X16) { + if (vt->variances.none.variance <= threshold) + return; + else + vt->force_split = 0; + } + + refine_variance_tree(vt->split[0], threshold); + refine_variance_tree(vt->split[1], threshold); + refine_variance_tree(vt->split[2], threshold); + refine_variance_tree(vt->split[3], threshold); + + if (vt->bsize <= BLOCK_16X16) + fill_variance_node(vt); + } else if (vt->width <= 0 || vt->height <= 0) { + fill_variance(0, 0, 0, &vt->variances.none); + } else { + const int src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd)); + const int ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); + const int sum = src_avg - ref_avg; + const unsigned int sse = sum * sum; + assert(vt->bsize == BLOCK_4X4); + fill_variance(sse, sum, 0, &vt->variances.none); + } +} + +static int check_split_key_frame(VAR_TREE *const vt, + const int64_t threshold) { + if (vt->bsize == BLOCK_32X32) { + vt->force_split = vt->variances.none.variance > threshold; + } else { + vt->force_split |= check_split_key_frame(vt->split[0], threshold); + vt->force_split |= check_split_key_frame(vt->split[1], threshold); + vt->force_split |= check_split_key_frame(vt->split[2], threshold); + vt->force_split |= check_split_key_frame(vt->split[3], threshold); + } + return vt->force_split; +} + +static int check_split(VP10_COMP *const cpi, + VAR_TREE *const vt, + const int segment_id, + const int64_t *const thresholds + ) { + if (vt->bsize == BLOCK_16X16) { + vt->force_split = vt->variances.none.variance > thresholds[0]; + if (!vt->force_split && + vt->variances.none.variance > thresholds[-1] && + !cyclic_refresh_segment_id_boosted(segment_id)) { + // We have some nominal amount of 16x16 variance (based on average), + // compute the minmax over the 8x8 sub-blocks, and if above threshold, + // force split to 8x8 block for this 16x16 block. + int minmax = compute_minmax_8x8(vt->src, vt->src_stride, + vt->ref, vt->ref_stride, #if CONFIG_VP9_HIGHBITDEPTH - if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { - s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); - if (!is_key_frame) - d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); - } else { - s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); - if (!is_key_frame) - d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); - } -#else - s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); - if (!is_key_frame) - d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); + vt->highbd, #endif - sum = s_avg - d_avg; - sse = sum * sum; + vt->width, vt->height); + vt->force_split = minmax > cpi->vbp_threshold_minmax; + } + } else { + vt->force_split |= check_split(cpi, vt->split[0], + segment_id, thresholds + 1); + vt->force_split |= check_split(cpi, vt->split[1], + segment_id, thresholds + 1); + vt->force_split |= check_split(cpi, vt->split[2], + segment_id, thresholds + 1); + vt->force_split |= check_split(cpi, vt->split[3], + segment_id, thresholds + 1); + + if (vt->bsize == BLOCK_32X32 && !vt->force_split) { + vt->force_split = vt->variances.none.variance > thresholds[0]; } - fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); } + + return vt->force_split; } // This function chooses partitioning based on the variance between source and -// reconstructed last, where variance is computed for down-sampled inputs. -static int choose_partitioning(VP10_COMP *cpi, +// reconstructed last (or golden), where variance is computed for down-sampled +// inputs. +static void choose_partitioning(VP10_COMP *const cpi, + ThreadData *const td, const TileInfo *const tile, - MACROBLOCK *x, - int mi_row, int mi_col) { - VP10_COMMON * const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - int i, j, k, m; - v64x64 vt; - v16x16 vt2[16]; - int force_split[21]; - uint8_t *s; - const uint8_t *d; - int sp; - int dp; + MACROBLOCK *const x, + const int mi_row, const int mi_col) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + VAR_TREE *const vt = td->var_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2]; + int i; + const uint8_t *src; + const uint8_t *ref; + int src_stride; + int ref_stride; int pixels_wide = 8 * num_8x8_blocks_wide_lookup[cm->sb_size]; int pixels_high = 8 * num_8x8_blocks_high_lookup[cm->sb_size]; - int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], - cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]}; + int64_t thresholds[5] = { + cpi->vbp_thresholds[0], + cpi->vbp_thresholds[1], + cpi->vbp_thresholds[2], + cpi->vbp_thresholds[3], + cpi->vbp_thresholds[4], + }; + BLOCK_SIZE bsize_min[5] = { + BLOCK_16X16, + BLOCK_16X16, + BLOCK_16X16, + cpi->vbp_bsize_min, + BLOCK_8X8 + }; + const int start_level = cm->sb_size == BLOCK_64X64 ? 1 : 0; + const int64_t *const thre = thresholds + start_level; + const BLOCK_SIZE *const bmin = bsize_min + start_level; - // Always use 4x4 partition for key frame. const int is_key_frame = (cm->frame_type == KEY_FRAME); - const int use_4x4_partition = is_key_frame; const int low_res = (cm->width <= 352 && cm->height <= 288); - int variance4x4downsample[16]; int segment_id = CR_SEGMENT_ID_BASE; @@ -858,11 +877,6 @@ static int choose_partitioning(VP10_COMP *cpi, } } -#if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES - printf("Not yet implemented: choose_partitioning\n"); - exit(-1); -#endif // CONFIG_EXT_PARTITION - set_offsets(cpi, tile, x, mi_row, mi_col, cm->sb_size); if (xd->mb_to_right_edge < 0) @@ -870,33 +884,31 @@ static int choose_partitioning(VP10_COMP *cpi, if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); - s = x->plane[0].src.buf; - sp = x->plane[0].src.stride; + src = x->plane[0].src.buf; + src_stride = x->plane[0].src.stride; if (!is_key_frame) { MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; unsigned int uv_sad; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - - const YV12_BUFFER_CONFIG *yv12_g = NULL; + const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); unsigned int y_sad, y_sad_g; - const int max_mi_block_size = cm->mib_size; - const int is_right_edge = mi_col + max_mi_block_size / 2 > cm->mi_cols; - const int is_left_edge = mi_row + max_mi_block_size / 2 > cm->mi_rows; + const int hbs = cm->mib_size / 2; + const int split_vert = mi_col + hbs >= cm->mi_cols; + const int split_horz = mi_row + hbs >= cm->mi_rows; BLOCK_SIZE bsize; - if (is_right_edge && is_left_edge) + if (split_vert && split_horz) bsize = get_subsize(cm->sb_size, PARTITION_SPLIT); - else if (is_right_edge) + else if (split_vert) bsize = get_subsize(cm->sb_size, PARTITION_VERT); - else if (is_left_edge) + else if (split_horz) bsize = get_subsize(cm->sb_size, PARTITION_HORZ); else bsize = cm->sb_size; assert(yv12 != NULL); - yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); if (yv12_g && yv12_g != yv12) { vp10_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, @@ -918,6 +930,7 @@ static int choose_partitioning(VP10_COMP *cpi, mbmi->interp_filter = BILINEAR; y_sad = vp10_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); + if (y_sad_g < y_sad) { vp10_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); @@ -944,196 +957,65 @@ static int choose_partitioning(VP10_COMP *cpi, x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2); } - d = xd->plane[0].dst.buf; - dp = xd->plane[0].dst.stride; + ref = xd->plane[0].dst.buf; + ref_stride = xd->plane[0].dst.stride; // If the y_sad is very small, take the largest partition and exit. // Don't check on boosted segment for now, as largest is suppressed there. if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { - if (!is_right_edge && !is_left_edge) { + if (!split_vert && !split_horz) { set_block_size(cpi, x, xd, mi_row, mi_col, cm->sb_size); - return 0; + return; } } } else { - d = VP10_VAR_OFFS; - dp = 0; + ref = VP10_VAR_OFFS; + ref_stride = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (xd->bd) { case 10: - d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10); + ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10); break; case 12: - d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12); + ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12); break; case 8: default: - d = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8); + ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8); break; } } #endif // CONFIG_VP9_HIGHBITDEPTH } - // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, - // 5-20 for the 16x16 blocks. - force_split[0] = 0; - // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances - // for splits. - for (i = 0; i < 4; i++) { - const int x32_idx = ((i & 1) << 5); - const int y32_idx = ((i >> 1) << 5); - const int i2 = i << 2; - force_split[i + 1] = 0; - for (j = 0; j < 4; j++) { - const int x16_idx = x32_idx + ((j & 1) << 4); - const int y16_idx = y32_idx + ((j >> 1) << 4); - const int split_index = 5 + i2 + j; - v16x16 *vst = &vt.split[i].split[j]; - force_split[split_index] = 0; - variance4x4downsample[i2 + j] = 0; - if (!is_key_frame) { - fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, -#if CONFIG_VP9_HIGHBITDEPTH - xd->cur_buf->flags, -#endif - pixels_wide, - pixels_high, - is_key_frame); - fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); - get_variance(&vt.split[i].split[j].part_variances.none); - if (vt.split[i].split[j].part_variances.none.variance > - thresholds[2]) { - // 16X16 variance is above threshold for split, so force split to 8x8 - // for this 16x16 block (this also forces splits for upper levels). - force_split[split_index] = 1; - force_split[i + 1] = 1; - force_split[0] = 1; - } else if (vt.split[i].split[j].part_variances.none.variance > - thresholds[1] && - !cyclic_refresh_segment_id_boosted(segment_id)) { - // We have some nominal amount of 16x16 variance (based on average), - // compute the minmax over the 8x8 sub-blocks, and if above threshold, - // force split to 8x8 block for this 16x16 block. - int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, -#if CONFIG_VP9_HIGHBITDEPTH - xd->cur_buf->flags, -#endif - pixels_wide, pixels_high); - if (minmax > cpi->vbp_threshold_minmax) { - force_split[split_index] = 1; - force_split[i + 1] = 1; - force_split[0] = 1; - } - } - } - if (is_key_frame || (low_res && - vt.split[i].split[j].part_variances.none.variance > - (thresholds[1] << 1))) { - force_split[split_index] = 0; - // Go down to 4x4 down-sampling for variance. - variance4x4downsample[i2 + j] = 1; - for (k = 0; k < 4; k++) { - int x8_idx = x16_idx + ((k & 1) << 3); - int y8_idx = y16_idx + ((k >> 1) << 3); - v8x8 *vst2 = is_key_frame ? &vst->split[k] : - &vt2[i2 + j].split[k]; - fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, + init_variance_tree(vt, #if CONFIG_VP9_HIGHBITDEPTH - xd->cur_buf->flags, -#endif - pixels_wide, - pixels_high, - is_key_frame); - } - } + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, +#endif // CONFIG_VP9_HIGHBITDEPTH + cm->sb_size, + (is_key_frame || low_res) ? BLOCK_4X4 : BLOCK_8X8, + pixels_wide, pixels_high, + src, src_stride, ref, ref_stride); + + // Fill in the entire tree of variances and compute splits. + if (is_key_frame) { + fill_variance_tree(vt, BLOCK_4X4); + check_split_key_frame(vt, thre[1]); + } else { + fill_variance_tree(vt, BLOCK_8X8); + check_split(cpi, vt, segment_id, thre); + if (low_res) { + refine_variance_tree(vt, thre[1] << 1); } } - // Fill the rest of the variance tree by summing split partition values. - for (i = 0; i < 4; i++) { - const int i2 = i << 2; - for (j = 0; j < 4; j++) { - if (variance4x4downsample[i2 + j] == 1) { - v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : - &vt.split[i].split[j]; - for (m = 0; m < 4; m++) - fill_variance_tree(&vtemp->split[m], BLOCK_8X8); - fill_variance_tree(vtemp, BLOCK_16X16); - } - } - fill_variance_tree(&vt.split[i], BLOCK_32X32); - // If variance of this 32x32 block is above the threshold, force the block - // to split. This also forces a split on the upper (64x64) level. - if (!force_split[i + 1]) { - get_variance(&vt.split[i].part_variances.none); - if (vt.split[i].part_variances.none.variance > thresholds[1]) { - force_split[i + 1] = 1; - force_split[0] = 1; - } - } - } - if (!force_split[0]) { - fill_variance_tree(&vt, BLOCK_64X64); - get_variance(&vt.part_variances.none); - } + vt->force_split |= mi_col + cm->mib_size > cm->mi_cols || + mi_row + cm->mib_size > cm->mi_rows; // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold. - if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || - !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col, - thresholds[0], BLOCK_16X16, force_split[0])) { - for (i = 0; i < 4; ++i) { - const int x32_idx = ((i & 1) << 2); - const int y32_idx = ((i >> 1) << 2); - const int i2 = i << 2; - if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32, - (mi_row + y32_idx), (mi_col + x32_idx), - thresholds[1], BLOCK_16X16, - force_split[i + 1])) { - for (j = 0; j < 4; ++j) { - const int x16_idx = ((j & 1) << 1); - const int y16_idx = ((j >> 1) << 1); - // For inter frames: if variance4x4downsample[] == 1 for this 16x16 - // block, then the variance is based on 4x4 down-sampling, so use vt2 - // in set_vt_partioning(), otherwise use vt. - v16x16 *vtemp = (!is_key_frame && - variance4x4downsample[i2 + j] == 1) ? - &vt2[i2 + j] : &vt.split[i].split[j]; - if (!set_vt_partitioning(cpi, x, xd, vtemp, BLOCK_16X16, - mi_row + y32_idx + y16_idx, - mi_col + x32_idx + x16_idx, - thresholds[2], - cpi->vbp_bsize_min, - force_split[5 + i2 + j])) { - for (k = 0; k < 4; ++k) { - const int x8_idx = (k & 1); - const int y8_idx = (k >> 1); - if (use_4x4_partition) { - if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k], - BLOCK_8X8, - mi_row + y32_idx + y16_idx + y8_idx, - mi_col + x32_idx + x16_idx + x8_idx, - thresholds[3], BLOCK_8X8, 0)) { - set_block_size(cpi, x, xd, - (mi_row + y32_idx + y16_idx + y8_idx), - (mi_col + x32_idx + x16_idx + x8_idx), - BLOCK_4X4); - } - } else { - set_block_size(cpi, x, xd, - (mi_row + y32_idx + y16_idx + y8_idx), - (mi_col + x32_idx + x16_idx + x8_idx), - BLOCK_8X8); - } - } - } - } - } - } - } - return 0; + set_vt_partitioning(cpi, x, xd, vt, mi_row, mi_col, thre, bmin); } static void update_state(VP10_COMP *cpi, ThreadData *td, @@ -2596,10 +2478,6 @@ static void rd_use_partition(VP10_COMP *cpi, int chosen_rate_nocoef = INT_MAX; #endif -#if CONFIG_EXT_PARTITION_TYPES - assert(0); -#endif - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -2823,6 +2701,13 @@ static void rd_use_partition(VP10_COMP *cpi, #endif } break; +#if CONFIG_EXT_PARTITION_TYPES + case PARTITION_VERT_A: + case PARTITION_VERT_B: + case PARTITION_HORZ_A: + case PARTITION_HORZ_B: + assert(0 && "Cannot handle extended partiton types"); +#endif // CONFIG_EXT_PARTITION_TYPES default: assert(0); break; @@ -4282,9 +4167,8 @@ static void encode_rd_sb_row(VP10_COMP *cpi, &dummy_rate_nocoef, #endif // CONFIG_SUPERTX 1, pc_root); - } else if (sf->partition_search_type == VAR_BASED_PARTITION && - cm->frame_type != KEY_FRAME) { - choose_partitioning(cpi, tile_info, x, mi_row, mi_col); + } else if (sf->partition_search_type == VAR_BASED_PARTITION) { + choose_partitioning(cpi, td, tile_info, x, mi_row, mi_col); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, cm->sb_size, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX @@ -4553,6 +4437,10 @@ static void encode_frame_internal(VP10_COMP *cpi) { #endif #endif + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION && + cpi->td.var_root[0] == NULL) + vp10_setup_var_tree(&cpi->common, &cpi->td); + { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index a39575b77..ea00c288f 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -463,6 +463,9 @@ static void dealloc_compressor_data(VP10_COMP *cpi) { vp10_free_pc_tree(&cpi->td); + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) + vp10_free_var_tree(&cpi->td); + if (cpi->common.allow_screen_content_tools) vpx_free(cpi->td.mb.palette_buffer); @@ -1999,6 +2002,8 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) { CHECK_MEM_ERROR(cm, x->palette_buffer, vpx_memalign(16, sizeof(*x->palette_buffer))); } + // Reallocate the pc_tree, as it's contents depends on + // the state of cm->allow_screen_content_tools vp10_free_pc_tree(&cpi->td); vp10_setup_pc_tree(&cpi->common, &cpi->td); } @@ -2586,6 +2591,8 @@ void vp10_remove_compressor(VP10_COMP *cpi) { vpx_free(thread_data->td->mb.palette_buffer); vpx_free(thread_data->td->counts); vp10_free_pc_tree(thread_data->td); + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) + vp10_free_var_tree(thread_data->td); vpx_free(thread_data->td); } } diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index bf7815f48..701eaad7b 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -34,6 +34,7 @@ #include "vp10/encoder/rd.h" #include "vp10/encoder/speed_features.h" #include "vp10/encoder/tokenize.h" +#include "vp10/encoder/variance_tree.h" #if CONFIG_VP9_TEMPORAL_DENOISING #include "vp10/encoder/denoiser.h" @@ -267,6 +268,9 @@ typedef struct ThreadData { PICK_MODE_CONTEXT *leaf_tree; PC_TREE *pc_tree; PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1]; + + VAR_TREE *var_tree; + VAR_TREE *var_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1]; } ThreadData; struct EncWorkerData; @@ -568,9 +572,12 @@ typedef struct VP10_COMP { int resize_count; // VAR_BASED_PARTITION thresholds - // 0 - threshold_64x64; 1 - threshold_32x32; - // 2 - threshold_16x16; 3 - vbp_threshold_8x8; - int64_t vbp_thresholds[4]; + // 0 - threshold_128x128; + // 1 - threshold_64x64; + // 2 - threshold_32x32; + // 3 - threshold_16x16; + // 4 - threshold_8x8; + int64_t vbp_thresholds[5]; int64_t vbp_threshold_minmax; int64_t vbp_threshold_sad; BLOCK_SIZE vbp_bsize_min; diff --git a/vp10/encoder/ethread.c b/vp10/encoder/ethread.c index 2742ed2b4..e552ec54d 100644 --- a/vp10/encoder/ethread.c +++ b/vp10/encoder/ethread.c @@ -93,6 +93,10 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) { thread_data->td->pc_tree = NULL; vp10_setup_pc_tree(cm, thread_data->td); + // Set up variance tree if needed. + if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) + vp10_setup_var_tree(cm, &cpi->td); + // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); diff --git a/vp10/encoder/variance_tree.c b/vp10/encoder/variance_tree.c new file mode 100644 index 000000000..d11ef2df6 --- /dev/null +++ b/vp10/encoder/variance_tree.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp10/encoder/variance_tree.h" +#include "vp10/encoder/encoder.h" + + + +void vp10_setup_var_tree(struct VP10Common *cm, ThreadData *td) { + int i, j; +#if CONFIG_EXT_PARTITION + const int leaf_nodes = 1024; + const int tree_nodes = 1024 + 256 + 64 + 16 + 4 + 1; +#else + const int leaf_nodes = 256; + const int tree_nodes = 256 + 64 + 16 + 4 + 1; +#endif // CONFIG_EXT_PARTITION + int index = 0; + VAR_TREE *this_var; + int nodes; + + vpx_free(td->var_tree); + CHECK_MEM_ERROR(cm, td->var_tree, vpx_calloc(tree_nodes, + sizeof(*td->var_tree))); + + this_var = &td->var_tree[0]; + + // Sets up all the leaf nodes in the tree. + for (index = 0; index < leaf_nodes; ++index) { + VAR_TREE *const leaf = &td->var_tree[index]; + leaf->split[0] = NULL; + } + + // Each node has 4 leaf nodes, fill in the child pointers + // from leafs to the root. + for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { + for (i = 0; i < nodes; ++i, ++index) { + VAR_TREE *const node = &td->var_tree[index]; + for (j = 0; j < 4; j++) + node->split[j] = this_var++; + } + } + + // Set up the root node for the largest superblock size + i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2; + td->var_root[i] = &td->var_tree[tree_nodes - 1]; + // Set up the root nodes for the rest of the possible superblock sizes + while (--i >= 0) { + td->var_root[i] = td->var_root[i+1]->split[0]; + } +} + +void vp10_free_var_tree(ThreadData *td) { + vpx_free(td->var_tree); + td->var_tree = NULL; +} diff --git a/vp10/encoder/variance_tree.h b/vp10/encoder/variance_tree.h new file mode 100644 index 000000000..a10f7e779 --- /dev/null +++ b/vp10/encoder/variance_tree.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_ENCODER_VARIANCE_TREE_H_ +#define VP10_ENCODER_VARIANCE_TREE_H_ + +#include + +#include "./vpx_config.h" + +#include "vpx/vpx_integer.h" + +#include "vp10/common/enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP10Common; +struct ThreadData; + +typedef struct { + int64_t sum_square_error; + int64_t sum_error; + int log2_count; + int variance; +} var; + +typedef struct { + var none; + var horz[2]; + var vert[2]; +} partition_variance; + +typedef struct VAR_TREE { + int force_split; + partition_variance variances; + struct VAR_TREE *split[4]; + BLOCK_SIZE bsize; + const uint8_t *src; + const uint8_t *ref; + int src_stride; + int ref_stride; + int width; + int height; +#if CONFIG_VP9_HIGHBITDEPTH + int highbd; +#endif // CONFIG_VP9_HIGHBITDEPTH +} VAR_TREE; + +void vp10_setup_var_tree(struct VP10Common *cm, struct ThreadData *td); +void vp10_free_var_tree(struct ThreadData *td); + +// Set variance values given sum square error, sum error, count. +static INLINE void fill_variance(int64_t s2, int64_t s, int c, var *v) { + v->sum_square_error = s2; + v->sum_error = s; + v->log2_count = c; + v->variance = (int)(256 * (v->sum_square_error - + ((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count); +} + +static INLINE void sum_2_variances(const var *a, const var *b, var *r) { + assert(a->log2_count == b->log2_count); + fill_variance(a->sum_square_error + b->sum_square_error, + a->sum_error + b->sum_error, a->log2_count + 1, r); +} + +static INLINE void fill_variance_node(VAR_TREE *vt) { + sum_2_variances(&vt->split[0]->variances.none, + &vt->split[1]->variances.none, + &vt->variances.horz[0]); + sum_2_variances(&vt->split[2]->variances.none, + &vt->split[3]->variances.none, + &vt->variances.horz[1]); + sum_2_variances(&vt->split[0]->variances.none, + &vt->split[2]->variances.none, + &vt->variances.vert[0]); + sum_2_variances(&vt->split[1]->variances.none, + &vt->split[3]->variances.none, + &vt->variances.vert[1]); + sum_2_variances(&vt->variances.vert[0], + &vt->variances.vert[1], + &vt->variances.none); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* VP10_ENCODER_VARIANCE_TREE_H_ */ diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk index 34b766f02..d174c8bc0 100644 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk @@ -21,6 +21,8 @@ VP10_CX_SRCS-yes += encoder/bitstream.c VP10_CX_SRCS-yes += encoder/bitwriter.h VP10_CX_SRCS-yes += encoder/context_tree.c VP10_CX_SRCS-yes += encoder/context_tree.h +VP10_CX_SRCS-yes += encoder/variance_tree.c +VP10_CX_SRCS-yes += encoder/variance_tree.h VP10_CX_SRCS-yes += encoder/cost.h VP10_CX_SRCS-yes += encoder/cost.c VP10_CX_SRCS-yes += encoder/dct.c diff --git a/vpx_dsp/avg.c b/vpx_dsp/avg.c index 26fe7859a..d3695a999 100644 --- a/vpx_dsp/avg.c +++ b/vpx_dsp/avg.c @@ -12,22 +12,22 @@ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" -unsigned int vpx_avg_8x8_c(const uint8_t *s, int p) { +unsigned int vpx_avg_8x8_c(const uint8_t *src, int stride) { int i, j; int sum = 0; - for (i = 0; i < 8; ++i, s+=p) - for (j = 0; j < 8; sum += s[j], ++j) {} + for (i = 0; i < 8; ++i, src += stride) + for (j = 0; j < 8; sum += src[j], ++j) {} - return (sum + 32) >> 6; + return ROUND_POWER_OF_TWO(sum, 6); } -unsigned int vpx_avg_4x4_c(const uint8_t *s, int p) { +unsigned int vpx_avg_4x4_c(const uint8_t *src, int stride) { int i, j; int sum = 0; - for (i = 0; i < 4; ++i, s+=p) - for (j = 0; j < 4; sum += s[j], ++j) {} + for (i = 0; i < 4; ++i, src += stride) + for (j = 0; j < 4; sum += src[j], ++j) {} - return (sum + 8) >> 4; + return ROUND_POWER_OF_TWO(sum, 4); } // src_diff: first pass, 9 bit, dynamic range [-255, 255] @@ -176,14 +176,15 @@ int vpx_vector_var_c(int16_t const *ref, int16_t const *src, return var; } -void vpx_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, +void vpx_minmax_8x8_c(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, int *min, int *max) { int i, j; *min = 255; *max = 0; - for (i = 0; i < 8; ++i, s += p, d += dp) { + for (i = 0; i < 8; ++i, src += src_stride, ref += ref_stride) { for (j = 0; j < 8; ++j) { - int diff = abs(s[j]-d[j]); + int diff = abs(src[j]-ref[j]); *min = diff < *min ? diff : *min; *max = diff > *max ? diff : *max; } @@ -191,24 +192,24 @@ void vpx_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, } #if CONFIG_VP9_HIGHBITDEPTH -unsigned int vpx_highbd_avg_8x8_c(const uint8_t *s8, int p) { +unsigned int vpx_highbd_avg_8x8_c(const uint8_t *src, int stride) { int i, j; int sum = 0; - const uint16_t* s = CONVERT_TO_SHORTPTR(s8); - for (i = 0; i < 8; ++i, s+=p) + const uint16_t* s = CONVERT_TO_SHORTPTR(src); + for (i = 0; i < 8; ++i, s += stride) for (j = 0; j < 8; sum += s[j], ++j) {} - return (sum + 32) >> 6; + return ROUND_POWER_OF_TWO(sum, 6); } -unsigned int vpx_highbd_avg_4x4_c(const uint8_t *s8, int p) { +unsigned int vpx_highbd_avg_4x4_c(const uint8_t *src, int stride) { int i, j; int sum = 0; - const uint16_t* s = CONVERT_TO_SHORTPTR(s8); - for (i = 0; i < 4; ++i, s+=p) + const uint16_t* s = CONVERT_TO_SHORTPTR(src); + for (i = 0; i < 4; ++i, s+=stride) for (j = 0; j < 4; sum += s[j], ++j) {} - return (sum + 8) >> 4; + return ROUND_POWER_OF_TWO(sum, 4); } void vpx_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8, diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h index 4ad23f8ae..eb3e62b37 100644 --- a/vpx_dsp/variance.h +++ b/vpx_dsp/variance.h @@ -23,10 +23,10 @@ extern "C" { #define FILTER_WEIGHT 128 typedef unsigned int(*vpx_sad_fn_t)(const uint8_t *a, int a_stride, - const uint8_t *b_ptr, int b_stride); + const uint8_t *b, int b_stride); -typedef unsigned int(*vpx_sad_avg_fn_t)(const uint8_t *a_ptr, int a_stride, - const uint8_t *b_ptr, int b_stride, +typedef unsigned int(*vpx_sad_avg_fn_t)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, const uint8_t *second_pred); typedef void (*vp8_copy32xn_fn_t)(const uint8_t *a, int a_stride, @@ -50,10 +50,10 @@ typedef unsigned int (*vpx_subpixvariance_fn_t)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, unsigned int *sse); -typedef unsigned int (*vpx_subp_avg_variance_fn_t)(const uint8_t *a_ptr, +typedef unsigned int (*vpx_subp_avg_variance_fn_t)(const uint8_t *a, int a_stride, int xoffset, int yoffset, - const uint8_t *b_ptr, + const uint8_t *b, int b_stride, unsigned int *sse, const uint8_t *second_pred); @@ -75,26 +75,25 @@ typedef struct variance_vtable { #endif // CONFIG_VP8 #if CONFIG_VP10 && CONFIG_EXT_INTER -typedef unsigned int(*vpx_masked_sad_fn_t)(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, +typedef unsigned int(*vpx_masked_sad_fn_t)(const uint8_t *src, + int src_stride, + const uint8_t *ref, int ref_stride, const uint8_t *msk_ptr, int msk_stride); -typedef unsigned int (*vpx_masked_variance_fn_t)(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, +typedef unsigned int (*vpx_masked_variance_fn_t)(const uint8_t *src, + int src_stride, + const uint8_t *ref, int ref_stride, - const uint8_t *msk_ptr, + const uint8_t *msk, int msk_stride, unsigned int *sse); -typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src_ptr, - int source_stride, - int xoffset, - int yoffset, - const uint8_t *ref_ptr, - int Refstride, - const uint8_t *msk_ptr, +typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src, + int src_stride, + int xoffset, int yoffset, + const uint8_t *ref, + int ref_stride, + const uint8_t *msk, int msk_stride, unsigned int *sse); #endif // CONFIG_VP10 && CONFIG_EXT_INTER -- 2.40.0