// purposes of activity masking.
// Eventually this should be replaced by custom no-reference routines,
// which will be faster.
-static const uint8_t VP9_VAR_OFFS[64] = {
+static const uint8_t VP9_VAR_OFFS[CODING_UNIT_SIZE] = {
+ 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128
};
#if CONFIG_VP9_HIGHBITDEPTH
-static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
+static const uint16_t VP9_HIGH_VAR_OFFS_8[CODING_UNIT_SIZE] = {
+ 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128
};
-static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
+static const uint16_t VP9_HIGH_VAR_OFFS_10[CODING_UNIT_SIZE] = {
+ 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
- 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4
};
-static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
+static const uint16_t VP9_HIGH_VAR_OFFS_12[CODING_UNIT_SIZE] = {
+ 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
- 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16
};
#endif // CONFIG_VP9_HIGHBITDEPTH
vp9_clear_system_state();
vp9_zero(vt);
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST);
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
#if CONFIG_INTERINTRA
xd->mi[0].src_mi->mbmi.ref_frame[1] = NONE;
#endif // CONFIG_INTERINTRA
- xd->mi[0].src_mi->mbmi.sb_type = BLOCK_64X64;
+ xd->mi[0].src_mi->mbmi.sb_type = BLOCK_LARGEST;
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
xd->mi[0].src_mi->mbmi.ref_mvs[LAST_FRAME],
&nearest_mv, &near_mv);
xd->mi[0].src_mi->mbmi.mv[0] = nearest_mv;
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_LARGEST);
d = xd->plane[0].dst.buf;
dp = xd->plane[0].dst.stride;
}
static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
- ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
- PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
+ ENTROPY_CONTEXT a[(CODING_UNIT_SIZE >> 2) *
+ MAX_MB_PLANE],
+ ENTROPY_CONTEXT l[(CODING_UNIT_SIZE >> 2) *
+ MAX_MB_PLANE],
+ PARTITION_CONTEXT sa[CODING_UNIT_SIZE >> 3],
+ PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3],
BLOCK_SIZE bsize) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
}
static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
- ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
- PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
+ ENTROPY_CONTEXT a[(CODING_UNIT_SIZE >> 2) *
+ MAX_MB_PLANE],
+ ENTROPY_CONTEXT l[(CODING_UNIT_SIZE >> 2) *
+ MAX_MB_PLANE],
+ PARTITION_CONTEXT sa[CODING_UNIT_SIZE >> 3],
+ PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3],
BLOCK_SIZE bsize) {
const MACROBLOCK *const x = &cpi->mb;
const MACROBLOCKD *const xd = &x->e_mbd;
int i, pl;
PARTITION_TYPE partition = PARTITION_NONE;
BLOCK_SIZE subsize;
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl[8], sa[8];
+ ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE];
+ ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE];
+ PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3];
RD_COST last_part_rdc, none_rdc, chosen_rdc;
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
#if CONFIG_SUPERTX
int rt_nocoef = 0;
#endif
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl[8], sa[8];
+ ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE];
+ ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE];
+ PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3];
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
- if (bsize == BLOCK_64X64)
+ if (bsize == BLOCK_LARGEST)
assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
if (do_recon) {
- int output_enabled = (bsize == BLOCK_64X64);
+ int output_enabled = (bsize == BLOCK_LARGEST);
// Check the projected output rate for this SB against it's target
// and and if necessary apply a Q delta using segmentation to get
const int col8x8_remaining = tile->mi_col_end - mi_col;
int bh, bw;
BLOCK_SIZE min_size = BLOCK_4X4;
- BLOCK_SIZE max_size = BLOCK_64X64;
+ BLOCK_SIZE max_size = BLOCK_LARGEST;
int i = 0;
int bs_hist[BLOCK_SIZES] = {0};
// Trap case where we do not have a prediction.
if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
// Default "min to max" and "max to min"
- min_size = BLOCK_64X64;
+ min_size = BLOCK_LARGEST;
max_size = BLOCK_4X4;
// NOTE: each call to get_sb_partition_size_range() uses the previous
BLOCK_SIZE bs, min_size, max_size;
- min_size = BLOCK_64X64;
+ min_size = BLOCK_LARGEST;
max_size = BLOCK_4X4;
if (prev_mi) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
- ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
- PARTITION_CONTEXT sl[8], sa[8];
+ ENTROPY_CONTEXT l[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE];
+ ENTROPY_CONTEXT a[(CODING_UNIT_SIZE / 4) * MAX_MB_PLANE];
+ PARTITION_CONTEXT sl[CODING_UNIT_SIZE >> 3], sa[CODING_UNIT_SIZE >> 3];
TOKENEXTRA *tp_orig = *tp;
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
int i, pl;
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
pc_tree->index != 3) {
- int output_enabled = (bsize == BLOCK_64X64);
+ int output_enabled = (bsize == BLOCK_LARGEST);
// Check the projected output rate for this SB against it's target
// and and if necessary apply a Q delta using segmentation to get
encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
}
- if (bsize == BLOCK_64X64) {
+ if (bsize == BLOCK_LARGEST) {
assert(tp_orig < *tp);
assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX);
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
SPEED_FEATURES *const sf = &cpi->sf;
int mi_col;
+ const int leaf_nodes = 64;
// Initialize the left context for the new SB row
vpx_memset(&xd->left_context, 0, sizeof(xd->left_context));
MODE_INFO *mi = cm->mi + idx_str;
if (sf->adaptive_pred_interp_filter) {
- for (i = 0; i < 64; ++i)
+ for (i = 0; i < leaf_nodes; ++i)
cpi->leaf_tree[i].pred_interp_filter = SWITCHABLE;
- for (i = 0; i < 64; ++i) {
+ for (i = 0; i < leaf_nodes; ++i) {
cpi->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
cpi->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
cpi->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
cpi->mb.source_variance = UINT_MAX;
if (sf->partition_search_type == FIXED_PARTITION) {
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST);
set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col,
sf->always_this_block_size);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+ rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_LARGEST,
&dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
1, cpi->pc_root);
} else if (cpi->partition_search_skippable_frame) {
BLOCK_SIZE bsize;
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST);
bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+ rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_LARGEST,
&dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
} else if (sf->partition_search_type == VAR_BASED_PARTITION &&
cm->frame_type != KEY_FRAME ) {
choose_partitioning(cpi, tile, mi_row, mi_col);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+ rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_LARGEST,
&dummy_rate, &dummy_dist,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
} else {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_LARGEST);
rd_auto_partition_range(cpi, tile, mi_row, mi_col,
&sf->min_partition_size,
&sf->max_partition_size);
}
- rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rdc,
+ rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_LARGEST,
+ &dummy_rdc,
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif
var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
dst, pd->dst.stride, &sse);
- x->bsse[(i << 2) + block_idx] = sse;
+ x->bsse[(i << MAX_MIN_TX_IN_BLOCK) + block_idx] = sse;
sum_sse += sse;
- x->skip_txfm[(i << 2) + block_idx] = 0;
+ x->skip_txfm[(i << MAX_MIN_TX_IN_BLOCK) + block_idx] = 0;
if (!x->select_tx_size) {
// Check if all ac coefficients can be quantized to zero.
if (var < ac_thr || var == 0) {
- x->skip_txfm[(i << 2) + block_idx] = 2;
+ x->skip_txfm[(i << MAX_MIN_TX_IN_BLOCK) + block_idx] = 2;
// Check if dc coefficient can be quantized to zero.
if (sse - var < dc_thr || sse == var) {
- x->skip_txfm[(i << 2) + block_idx] = 1;
+ x->skip_txfm[(i << MAX_MIN_TX_IN_BLOCK) + block_idx] = 1;
if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
low_err_skip = 1;
}
#endif // CONFIG_SR_MODE
- if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
+ if (x->skip_txfm[(plane << MAX_MIN_TX_IN_BLOCK) +
+ (block >> (tx_size << 1))] == 0) {
// full forward transform and quantization
#if CONFIG_NEW_QUANT
if (x->quant_fp)
#if CONFIG_SR_MODE
}
#endif // CONFIG_SR_MODE
- } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
+ } else if (x->skip_txfm[(plane << MAX_MIN_TX_IN_BLOCK) +
+ (block >> (tx_size << 1))] == 2) {
// compute DC coefficient
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
tx_size, args, tmp_buf, tmp_stride);
} else {
#endif // CONFIG_SR_MODE
- args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
+ args->sse = x->bsse[(plane << MAX_MIN_TX_IN_BLOCK) +
+ (block >> (tx_size << 1))] << 4;
args->dist = args->sse;
if (x->plane[plane].eobs[block]) {
int64_t dc_correct = coeff[0] * coeff[0] -
} else {
// skip forward transform
x->plane[plane].eobs[block] = 0;
- args->sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
+ args->sse = x->bsse[(plane << MAX_MIN_TX_IN_BLOCK) +
+ (block >> (tx_size << 1))] << 4;
args->dist = args->sse;
}
} else {
int i;
int_mv cur_dv;
int64_t rd;
- uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
- int64_t bsse[MAX_MB_PLANE << 2] = {0};
+ uint8_t skip_txfm[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK] = {0};
+ int64_t bsse[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK] = {0};
int skip_txfm_sb = 0;
int64_t skip_sse_sb = INT64_MAX;
for (idy = 0; idy < num_4x4_blocks_high; ++idy)
for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
- vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
- &mic->bmi[i], sizeof(mic->bmi[i]));
+ if (idx || idy)
+ vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
+ &mic->bmi[i], sizeof(mic->bmi[i]));
return cost_mv_ref(cpi, mode,
mbmi->mode_context[mbmi->ref_frame[0]]) + thismvcost;
step_param = cpi->mv_step_param;
}
- if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
- int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] -
+ if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST) {
+ int boffset = 2 * (b_width_log2_lookup[BLOCK_LARGEST] -
MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = MAX(step_param, boffset);
}
}
// TODO(debargha): is show_frame needed here?
- if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
+ if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST &&
cm->show_frame) {
- int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] -
+ int boffset = 2 * (b_width_log2_lookup[BLOCK_LARGEST] -
MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = MAX(step_param, boffset);
}
int_mv single_newmv[MAX_REF_FRAMES];
#endif // CONFIG_NEW_INTER
#if CONFIG_VP9_HIGHBITDEPTH
- DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE * 64 * 64);
- DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE * 64 * 64);
+ DECLARE_ALIGNED_ARRAY(16, uint16_t, tmp_buf16, MAX_MB_PLANE *
+ CODING_UNIT_SIZE * CODING_UNIT_SIZE);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf8, MAX_MB_PLANE *
+ CODING_UNIT_SIZE * CODING_UNIT_SIZE);
uint8_t *tmp_buf;
#else
- DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE *
+ CODING_UNIT_SIZE * CODING_UNIT_SIZE);
#endif // CONFIG_VP9_HIGHBITDEPTH
- const int tmp_buf_sz = 64 * 64;
+ const int tmp_buf_sz = CODING_UNIT_SIZE * CODING_UNIT_SIZE;
int pred_exists = 0;
int intpel_mv;
int64_t rd, tmp_rd, best_rd = INT64_MAX;
int rate_mv_tmp = 0;
#endif // CONFIG_INTERINTRA || CONFIG_WEDGE_PARTITION
INTERP_FILTER best_filter = SWITCHABLE;
- uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
- int64_t bsse[MAX_MB_PLANE << 2] = {0};
+ uint8_t skip_txfm[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK] = {0};
+ int64_t bsse[MAX_MB_PLANE << MAX_MIN_TX_IN_BLOCK] = {0};
int bsl = mi_width_log2_lookup[bsize];
int pred_filter_search = cpi->sf.cb_pred_filter_search ?
} else {
for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz;
- xd->plane[j].dst.stride = 64;
+ xd->plane[j].dst.stride = CODING_UNIT_SIZE;
}
}
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
// again temporarily set the buffers to local memory to prevent a memcpy
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].dst.buf = tmp_buf + i * tmp_buf_sz;
- xd->plane[i].dst.stride = 64;
+ xd->plane[i].dst.stride = CODING_UNIT_SIZE;
}
}
rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
int mode;
for (mode = 0; mode < top_mode; ++mode) {
const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
- const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
+ const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_LARGEST);
BLOCK_SIZE bs;
for (bs = min_size; bs <= max_size; ++bs) {
int *const fact = &cpi->rd.thresh_freq_fact[bs][mode];
TX_SIZE best_tx_size;
int rate2_tx, this_skip2_tx = 0;
int64_t distortion2_tx, bestrd_tx = INT64_MAX;
- uint8_t tmp_zcoeff_blk[256];
+ uint8_t tmp_zcoeff_blk[(CODING_UNIT_SIZE * CODING_UNIT_SIZE) / 16];
#endif // CONFIG_EXT_TX
*mbmi = *inter_ref_list[copy_mode - REF0];