From: Julia Robson Date: Thu, 5 Nov 2015 17:49:50 +0000 (+0000) Subject: Changes to use defined constants rather than hard-coded numbers X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d90a3265f0c6afe00947af6a4e219459531884a6;p=libvpx Changes to use defined constants rather than hard-coded numbers Also fixes a valgrind error when optimizations are disabled. Done in preparation for the work on the extended coding unit size experiment. Change-Id: Ib074c5a02c94ebed7dd61ff0465d26fa89834545 --- diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h index 7646b9cd5..c8926610b 100644 --- a/vp9/common/vp9_enums.h +++ b/vp9/common/vp9_enums.h @@ -118,6 +118,11 @@ typedef enum { TX_SIZES } TX_SIZE; +#define MAX_TX_SIZE_LOG2 (TX_SIZES + 1) +#define MAX_MIN_TX_IN_BLOCK_LOG2 MAX((CODING_UNIT_SIZE_LOG2 - \ + MAX_TX_SIZE_LOG2), 1) +#define MAX_MIN_TX_IN_BLOCK (1 << MAX_MIN_TX_IN_BLOCK_LOG2) + // frame transform mode typedef enum { ONLY_4X4 = 0, // only 4x4 transform used diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c index f7fca0cde..8db65f775 100644 --- a/vp9/encoder/vp9_aq_complexity.c +++ b/vp9/encoder/vp9_aq_complexity.c @@ -100,8 +100,8 @@ void vp9_select_in_frame_q_segment(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int bw = num_8x8_blocks_wide_lookup[BLOCK_LARGEST]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_LARGEST]; const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); int complexity_metric = 64; diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 3b07d452f..32df49c30 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -268,9 +268,9 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { bl_index = mi_row * cm->mi_cols + mi_col; // Loop through all 8x8 blocks in superblock and update map. xmis = MIN(cm->mi_cols - mi_col, - num_8x8_blocks_wide_lookup[BLOCK_64X64]); + num_8x8_blocks_wide_lookup[BLOCK_LARGEST]); ymis = MIN(cm->mi_rows - mi_row, - num_8x8_blocks_high_lookup[BLOCK_64X64]); + num_8x8_blocks_high_lookup[BLOCK_LARGEST]); for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { const int bl_index2 = bl_index + y * cm->mi_cols + x; diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c index 7d75f09a4..256bcf51c 100644 --- a/vp9/encoder/vp9_aq_variance.c +++ b/vp9/encoder/vp9_aq_variance.c @@ -33,9 +33,11 @@ static int segment_id[MAX_SEGMENTS] = { 5, 3, 1, 0, 2, 4, 6, 7 }; #define RDMULT_RATIO(i) rdmult_ratio[(i) - ENERGY_MIN] #define SEGMENT_ID(i) segment_id[(i) - ENERGY_MIN] -DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = {0}; +DECLARE_ALIGNED(16, static const uint8_t, + vp9_coding_unit_size_zeros[CODING_UNIT_SIZE]) = {0}; #if CONFIG_VP9_HIGHBITDEPTH -DECLARE_ALIGNED(16, static const uint16_t, vp9_highbd_64_zeros[64]) = {0}; +DECLARE_ALIGNED(16, static const uint16_t, + vp9_highbd_coding_unit_size_zeros[CODING_UNIT_SIZE]) = {0}; #endif unsigned int vp9_vaq_segment_id(int energy) { @@ -132,36 +134,34 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_variance(x->plane[0].src.buf, x->plane[0].src.stride, - CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, - &sse, &avg); + CONVERT_TO_BYTEPTR(vp9_highbd_coding_unit_size_zeros), 0, + bw, bh, &sse, &avg); sse >>= 2 * (xd->bd - 8); avg >>= (xd->bd - 8); } else { variance(x->plane[0].src.buf, x->plane[0].src.stride, - vp9_64_zeros, 0, bw, bh, &sse, &avg); + vp9_coding_unit_size_zeros, 0, bw, bh, &sse, &avg); } #else variance(x->plane[0].src.buf, x->plane[0].src.stride, - vp9_64_zeros, 0, bw, bh, &sse, &avg); + vp9_coding_unit_size_zeros, 0, bw, bh, &sse, &avg); #endif // CONFIG_VP9_HIGHBITDEPTH var = sse - (((int64_t)avg * avg) / (bw * bh)); return (256 * var) / (bw * bh); } else { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, - x->plane[0].src.stride, - CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), - 0, &sse); + var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, + CONVERT_TO_BYTEPTR(vp9_highbd_coding_unit_size_zeros), 0, &sse); } else { var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, - vp9_64_zeros, 0, &sse); + vp9_coding_unit_size_zeros, 0, &sse); } #else var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, - vp9_64_zeros, 0, &sse); + vp9_coding_unit_size_zeros, 0, &sse); #endif // CONFIG_VP9_HIGHBITDEPTH return (256 * var) >> num_pels_log2_lookup[bs]; } diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index f9ee48596..6d7f50443 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1563,7 +1563,7 @@ static void write_modes(VP9_COMP *cpi, #if CONFIG_SUPERTX 0, #endif - mi_row, mi_col, BLOCK_64X64); + mi_row, mi_col, BLOCK_LARGEST); } } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index e95a5fc6e..6f3e9ed31 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -25,7 +25,7 @@ typedef struct { } diff; struct macroblock_plane { - DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]); + DECLARE_ALIGNED(16, int16_t, src_diff[CODING_UNIT_SIZE * CODING_UNIT_SIZE]); #if CONFIG_SR_MODE DECLARE_ALIGNED(16, int16_t, src_sr_diff[64 * 64]); #endif // CONFIG_SR_MODE @@ -111,10 +111,11 @@ struct macroblock { int mv_row_min; int mv_row_max; + // Second dimension of zcoeff_blk is maximum number of 4x4s in a superblock #if CONFIG_SR_MODE - uint8_t zcoeff_blk[TX_SIZES + 1][256]; + uint8_t zcoeff_blk[TX_SIZES + 1][(CODING_UNIT_SIZE * CODING_UNIT_SIZE) >> 4]; #else // CONFIG_SR_MODE - uint8_t zcoeff_blk[TX_SIZES][256]; + uint8_t zcoeff_blk[TX_SIZES][(CODING_UNIT_SIZE * CODING_UNIT_SIZE) >> 4]; #endif // CONFIG_SR_MODE int skip; @@ -139,9 +140,9 @@ struct macroblock { int quant_fp; // skip forward transform and quantization - uint8_t skip_txfm[MAX_MB_PLANE << 2]; + uint8_t skip_txfm[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK]; - int64_t bsse[MAX_MB_PLANE << 2]; + int64_t bsse[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK]; // Used to store sub partition's choices. MV pred_mv[MAX_REF_FRAMES]; diff --git a/vp9/encoder/vp9_context_tree.c b/vp9/encoder/vp9_context_tree.c index 9d69b4d39..de8c2c2dd 100644 --- a/vp9/encoder/vp9_context_tree.c +++ b/vp9/encoder/vp9_context_tree.c @@ -183,7 +183,7 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { // Each node has 4 leaf nodes, fill each block_size level of the tree // from leafs to the root. - for (nodes = 16; nodes > 0; nodes >>= 2) { + for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { for (i = 0; i < nodes; ++i) { PC_TREE *const tree = &cpi->pc_tree[pc_tree_index]; alloc_tree_contexts(cm, tree, 4 << (2 * square_index)); @@ -200,10 +200,11 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { void vp9_free_pc_tree(VP9_COMP *cpi) { const int tree_nodes = 64 + 16 + 4 + 1; + const int leaf_nodes = 64; int i; // Set up all 4x4 mode contexts - for (i = 0; i < 64; ++i) + for (i = 0; i < leaf_nodes; ++i) free_mode_context(&cpi->leaf_tree[i]); // Sets up all the leaf nodes in the tree. diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index 4fd07a00c..f39d07e57 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -48,7 +48,7 @@ typedef struct { // For current partition, only if all Y, U, and V transform blocks' // coefficients are quantized to 0, skippable is set to 0. int skippable; - uint8_t skip_txfm[MAX_MB_PLANE << 2]; + uint8_t skip_txfm[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK]; int best_mode_index; int hybrid_pred_diff; int comp_pred_diff; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index eeb5f3a17..989099219 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -105,7 +105,8 @@ static void rd_supertx_sb(VP9_COMP *cpi, const TileInfo *const tile, // purposes of activity masking. // Eventually this should be replaced by custom no-reference routines, // which will be faster. -static const uint8_t VP9_VAR_OFFS[64] = { +static const uint8_t VP9_VAR_OFFS[CODING_UNIT_SIZE] = { + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -113,11 +114,11 @@ static const uint8_t VP9_VAR_OFFS[64] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }; #if CONFIG_VP9_HIGHBITDEPTH -static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { +static const uint16_t VP9_HIGH_VAR_OFFS_8[CODING_UNIT_SIZE] = { + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -125,10 +126,10 @@ static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 }; -static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { +static const uint16_t VP9_HIGH_VAR_OFFS_10[CODING_UNIT_SIZE] = { + 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, @@ -136,10 +137,10 @@ static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, - 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4 }; -static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { +static const uint16_t VP9_HIGH_VAR_OFFS_12[CODING_UNIT_SIZE] = { + 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, @@ -147,7 +148,6 @@ static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, - 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16 }; #endif // CONFIG_VP9_HIGHBITDEPTH @@ -599,7 +599,7 @@ static void choose_partitioning(VP9_COMP *cpi, vp9_clear_system_state(); vp9_zero(vt); - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST); if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); @@ -616,13 +616,13 @@ static void choose_partitioning(VP9_COMP *cpi, #if CONFIG_INTERINTRA xd->mi[0].src_mi->mbmi.ref_frame[1] = NONE; #endif // CONFIG_INTERINTRA - xd->mi[0].src_mi->mbmi.sb_type = BLOCK_64X64; + xd->mi[0].src_mi->mbmi.sb_type = BLOCK_LARGEST; vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, xd->mi[0].src_mi->mbmi.ref_mvs[LAST_FRAME], &nearest_mv, &near_mv); xd->mi[0].src_mi->mbmi.mv[0] = nearest_mv; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64); + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_LARGEST); d = xd->plane[0].dst.buf; dp = xd->plane[0].dst.stride; @@ -1647,9 +1647,12 @@ static void update_stats(VP9_COMMON *cm, const MACROBLOCK *x) { } static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, - ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], - ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], - PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], + ENTROPY_CONTEXT a[(CODING_UNIT_SIZE >> 2) * + MAX_MB_PLANE], + ENTROPY_CONTEXT l[(CODING_UNIT_SIZE >> 2) * + MAX_MB_PLANE], + PARTITION_CONTEXT sa[CODING_UNIT_SIZE >> 3], + PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -1678,9 +1681,12 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, } static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, - ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], - ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], - PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], + ENTROPY_CONTEXT a[(CODING_UNIT_SIZE >> 2) * + MAX_MB_PLANE], + ENTROPY_CONTEXT l[(CODING_UNIT_SIZE >> 2) * + MAX_MB_PLANE], + PARTITION_CONTEXT sa[CODING_UNIT_SIZE >> 3], + PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], BLOCK_SIZE bsize) { const MACROBLOCK *const x = &cpi->mb; const MACROBLOCKD *const xd = &x->e_mbd; @@ -2018,8 +2024,9 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, int i, pl; PARTITION_TYPE partition = PARTITION_NONE; BLOCK_SIZE subsize; - ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; - PARTITION_CONTEXT sl[8], sa[8]; + ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3]; RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; @@ -2268,8 +2275,9 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, #if CONFIG_SUPERTX int rt_nocoef = 0; #endif - ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; - PARTITION_CONTEXT sl[8], sa[8]; + ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3]; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; @@ -2344,11 +2352,11 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, // We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. - if (bsize == BLOCK_64X64) + if (bsize == BLOCK_LARGEST) assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); if (do_recon) { - int output_enabled = (bsize == BLOCK_64X64); + int output_enabled = (bsize == BLOCK_LARGEST); // Check the projected output rate for this SB against it's target // and and if necessary apply a Q delta using segmentation to get @@ -2441,14 +2449,14 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, const int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; BLOCK_SIZE min_size = BLOCK_4X4; - BLOCK_SIZE max_size = BLOCK_64X64; + BLOCK_SIZE max_size = BLOCK_LARGEST; int i = 0; int bs_hist[BLOCK_SIZES] = {0}; // Trap case where we do not have a prediction. if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { // Default "min to max" and "max to min" - min_size = BLOCK_64X64; + min_size = BLOCK_LARGEST; max_size = BLOCK_4X4; // NOTE: each call to get_sb_partition_size_range() uses the previous @@ -2539,7 +2547,7 @@ static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bs, min_size, max_size; - min_size = BLOCK_64X64; + min_size = BLOCK_LARGEST; max_size = BLOCK_4X4; if (prev_mi) { @@ -2861,8 +2869,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; - ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; - PARTITION_CONTEXT sl[8], sa[8]; + ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE]; + PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3]; TOKENEXTRA *tp_orig = *tp; PICK_MODE_CONTEXT *ctx = &pc_tree->none; int i, pl; @@ -3759,7 +3768,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && pc_tree->index != 3) { - int output_enabled = (bsize == BLOCK_64X64); + int output_enabled = (bsize == BLOCK_LARGEST); // Check the projected output rate for this SB against it's target // and and if necessary apply a Q delta using segmentation to get @@ -3774,7 +3783,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } - if (bsize == BLOCK_64X64) { + if (bsize == BLOCK_LARGEST) { assert(tp_orig < *tp); assert(best_rdc.rate < INT_MAX); assert(best_rdc.dist < INT64_MAX); @@ -3790,6 +3799,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->mb.e_mbd; SPEED_FEATURES *const sf = &cpi->sf; int mi_col; + const int leaf_nodes = 64; // Initialize the left context for the new SB row vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); @@ -3810,10 +3820,10 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO *mi = cm->mi + idx_str; if (sf->adaptive_pred_interp_filter) { - for (i = 0; i < 64; ++i) + for (i = 0; i < leaf_nodes; ++i) cpi->leaf_tree[i].pred_interp_filter = SWITCHABLE; - for (i = 0; i < 64; ++i) { + for (i = 0; i < leaf_nodes; ++i) { cpi->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; cpi->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; cpi->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; @@ -3826,10 +3836,10 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, cpi->mb.source_variance = UINT_MAX; if (sf->partition_search_type == FIXED_PARTITION) { - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST); set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, sf->always_this_block_size); - rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_LARGEST, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, @@ -3837,10 +3847,10 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, 1, cpi->pc_root); } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); - rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_LARGEST, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, @@ -3849,7 +3859,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, } else if (sf->partition_search_type == VAR_BASED_PARTITION && cm->frame_type != KEY_FRAME ) { choose_partitioning(cpi, tile, mi_row, mi_col); - rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64, + rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_LARGEST, &dummy_rate, &dummy_dist, #if CONFIG_SUPERTX &dummy_rate_nocoef, @@ -3858,12 +3868,13 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, } else { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST); rd_auto_partition_range(cpi, tile, mi_row, mi_col, &sf->min_partition_size, &sf->max_partition_size); } - rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rdc, + rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_LARGEST, + &dummy_rdc, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index d1dc556e0..8c1959fed 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -32,8 +32,8 @@ #endif struct optimize_ctx { - ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; - ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; + ENTROPY_CONTEXT ta[MAX_MB_PLANE][(CODING_UNIT_SIZE >> 2)]; + ENTROPY_CONTEXT tl[MAX_MB_PLANE][(CODING_UNIT_SIZE >> 2)]; }; struct encode_b_args { @@ -2585,7 +2585,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, } #endif // CONFIG_SR_MODE if (max_txsize_lookup[plane_bsize] == tx_size) { - if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) { + if (x->skip_txfm[(plane << MAX_MIN_TX_IN_BLOCK) + + (block >> (tx_size << 1))] == 0) { // full forward transform and quantization #if CONFIG_NEW_QUANT if (x->quant_fp) @@ -2598,7 +2599,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, else vp9_xform_quant(x, plane, block, plane_bsize, tx_size); #endif // CONFIG_NEW_QUANT - } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) { + } else if (x->skip_txfm[(plane << MAX_MIN_TX_IN_BLOCK) + + (block >> (tx_size << 1))] == 2) { // fast path forward transform and quantization #if CONFIG_NEW_QUANT if (x->quant_fp) diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 0be05ae70..7f479fb1a 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -287,13 +287,15 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { #define SETUP_CENTER_ERROR \ if (second_pred != NULL) { \ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { \ - DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); \ + DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, CODING_UNIT_SIZE * \ + CODING_UNIT_SIZE); \ vp9_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, \ y_stride); \ besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, \ sse1); \ } else { \ - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \ + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, CODING_UNIT_SIZE * \ + CODING_UNIT_SIZE); \ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \ besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \ } \ @@ -307,7 +309,8 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { #define SETUP_CENTER_ERROR \ if (second_pred != NULL) { \ - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \ + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, CODING_UNIT_SIZE * \ + CODING_UNIT_SIZE); \ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \ besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \ } else { \ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 9b9923fb6..edbd6aabd 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -476,18 +476,18 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride, &sse); - x->bsse[(i << 2) + block_idx] = sse; + x->bsse[(i << MAX_MIN_TX_IN_BLOCK) + block_idx] = sse; sum_sse += sse; - x->skip_txfm[(i << 2) + block_idx] = 0; + x->skip_txfm[(i << MAX_MIN_TX_IN_BLOCK) + block_idx] = 0; if (!x->select_tx_size) { // Check if all ac coefficients can be quantized to zero. if (var < ac_thr || var == 0) { - x->skip_txfm[(i << 2) + block_idx] = 2; + x->skip_txfm[(i << MAX_MIN_TX_IN_BLOCK) + block_idx] = 2; // Check if dc coefficient can be quantized to zero. if (sse - var < dc_thr || sse == var) { - x->skip_txfm[(i << 2) + block_idx] = 1; + x->skip_txfm[(i << MAX_MIN_TX_IN_BLOCK) + block_idx] = 1; if (!sse || (var < low_ac_thr && sse - var < low_dc_thr)) low_err_skip = 1; @@ -971,7 +971,8 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, } #endif // CONFIG_SR_MODE - if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) { + if (x->skip_txfm[(plane << MAX_MIN_TX_IN_BLOCK) + + (block >> (tx_size << 1))] == 0) { // full forward transform and quantization #if CONFIG_NEW_QUANT if (x->quant_fp) @@ -1004,7 +1005,8 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, #if CONFIG_SR_MODE } #endif // CONFIG_SR_MODE - } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) { + } else if (x->skip_txfm[(plane << MAX_MIN_TX_IN_BLOCK) + + (block >> (tx_size << 1))] == 2) { // compute DC coefficient tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); @@ -1030,7 +1032,8 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, tx_size, args, tmp_buf, tmp_stride); } else { #endif // CONFIG_SR_MODE - args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; + args->sse = x->bsse[(plane << MAX_MIN_TX_IN_BLOCK) + + (block >> (tx_size << 1))] << 4; args->dist = args->sse; if (x->plane[plane].eobs[block]) { int64_t dc_correct = coeff[0] * coeff[0] - @@ -1052,7 +1055,8 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, } else { // skip forward transform x->plane[plane].eobs[block] = 0; - args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; + args->sse = x->bsse[(plane << MAX_MIN_TX_IN_BLOCK) + + (block >> (tx_size << 1))] << 4; args->dist = args->sse; } } else { @@ -2102,8 +2106,8 @@ static int64_t handle_intrabc_mode(VP9_COMP *cpi, MACROBLOCK *x, int i; int_mv cur_dv; int64_t rd; - uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0}; - int64_t bsse[MAX_MB_PLANE << 2] = {0}; + uint8_t skip_txfm[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK] = {0}; + int64_t bsse[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK] = {0}; int skip_txfm_sb = 0; int64_t skip_sse_sb = INT64_MAX; @@ -3579,8 +3583,9 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i, for (idy = 0; idy < num_4x4_blocks_high; ++idy) for (idx = 0; idx < num_4x4_blocks_wide; ++idx) - vpx_memcpy(&mic->bmi[i + idy * 2 + idx], - &mic->bmi[i], sizeof(mic->bmi[i])); + if (idx || idy) + vpx_memcpy(&mic->bmi[i + idy * 2 + idx], + &mic->bmi[i], sizeof(mic->bmi[i])); return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) + thismvcost; @@ -4783,8 +4788,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, step_param = cpi->mv_step_param; } - if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { - int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] - + if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST) { + int boffset = 2 * (b_width_log2_lookup[BLOCK_LARGEST] - MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); step_param = MAX(step_param, boffset); } @@ -5108,9 +5113,9 @@ static void do_masked_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } // TODO(debargha): is show_frame needed here? - if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && + if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST && cm->show_frame) { - int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] - + int boffset = 2 * (b_width_log2_lookup[BLOCK_LARGEST] - MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); step_param = MAX(step_param, boffset); } @@ -5274,13 +5279,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int_mv single_newmv[MAX_REF_FRAMES]; #endif // CONFIG_NEW_INTER #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64); - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64); + DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * + CODING_UNIT_SIZE * CODING_UNIT_SIZE); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * + CODING_UNIT_SIZE * CODING_UNIT_SIZE); uint8_t *tmp_buf; #else - DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); + DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * + CODING_UNIT_SIZE * CODING_UNIT_SIZE); #endif // CONFIG_VP9_HIGHBITDEPTH - const int tmp_buf_sz = 64 * 64; + const int tmp_buf_sz = CODING_UNIT_SIZE * CODING_UNIT_SIZE; int pred_exists = 0; int intpel_mv; int64_t rd, tmp_rd, best_rd = INT64_MAX; @@ -5292,8 +5300,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int rate_mv_tmp = 0; #endif // CONFIG_INTERINTRA || CONFIG_WEDGE_PARTITION INTERP_FILTER best_filter = SWITCHABLE; - uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0}; - int64_t bsse[MAX_MB_PLANE << 2] = {0}; + uint8_t skip_txfm[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK] = {0}; + int64_t bsse[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK] = {0}; int bsl = mi_width_log2_lookup[bsize]; int pred_filter_search = cpi->sf.cb_pred_filter_search ? @@ -5631,7 +5639,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } else { for (j = 0; j < MAX_MB_PLANE; j++) { xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz; - xd->plane[j].dst.stride = 64; + xd->plane[j].dst.stride = CODING_UNIT_SIZE; } } vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); @@ -5996,7 +6004,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // again temporarily set the buffers to local memory to prevent a memcpy for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = tmp_buf + i * tmp_buf_sz; - xd->plane[i].dst.stride = 64; + xd->plane[i].dst.stride = CODING_UNIT_SIZE; } } rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0); @@ -6695,7 +6703,7 @@ static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize, int mode; for (mode = 0; mode < top_mode; ++mode) { const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4); - const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64); + const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_LARGEST); BLOCK_SIZE bs; for (bs = min_size; bs <= max_size; ++bs) { int *const fact = &cpi->rd.thresh_freq_fact[bs][mode]; @@ -7902,7 +7910,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE best_tx_size; int rate2_tx, this_skip2_tx = 0; int64_t distortion2_tx, bestrd_tx = INT64_MAX; - uint8_t tmp_zcoeff_blk[256]; + uint8_t tmp_zcoeff_blk[(CODING_UNIT_SIZE * CODING_UNIT_SIZE) / 16]; #endif // CONFIG_EXT_TX *mbmi = *inter_ref_list[copy_mode - REF0]; diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 16f89ce23..a239fde8a 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -322,13 +322,13 @@ void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd) { mi_ptr = cm->mi + tile.mi_col_start; for (mi_row = 0; mi_row < cm->mi_rows; - mi_row += 8, mi_ptr += 8 * cm->mi_stride) { + mi_row += MI_BLOCK_SIZE, mi_ptr += MI_BLOCK_SIZE * cm->mi_stride) { MODE_INFO *mi = mi_ptr; for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; - mi_col += 8, mi += 8) + mi_col += MI_BLOCK_SIZE, mi += MI_BLOCK_SIZE) count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, - mi_row, mi_col, BLOCK_64X64); + mi_row, mi_col, BLOCK_LARGEST); } } diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 26b861f92..b940b51f5 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -340,7 +340,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; sf->auto_min_max_partition_size = NOT_IN_USE; - sf->max_partition_size = BLOCK_64X64; + sf->max_partition_size = BLOCK_LARGEST; sf->min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; sf->last_partitioning_redo_frequency = 4;