INTRA_FILTER intra_filter;
#endif // CONFIG_EXT_INTRA
+#if CONFIG_OBMC
+ int8_t obmc;
+#endif // CONFIG_OBMC
+
int_mv mv[2];
int_mv pred_mv[2];
#if CONFIG_REF_MV
return mbmi->ref_frame[1] > INTRA_FRAME;
}
+#if CONFIG_OBMC
+static INLINE int is_obmc_allowed(const MB_MODE_INFO *mbmi) {
+ return (mbmi->sb_type >= BLOCK_8X8);
+}
+#endif // CONFIG_OBMC
+
PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
const MODE_INFO *left_mi, int b);
};
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+static const vpx_prob default_obmc_prob[BLOCK_SIZES] = {
+ 255, 255, 255, 151, 153, 144, 178, 165, 160, 207, 195, 168, 244,
+};
+#endif // CONFIG_OBMC
+
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
-DC_PRED, 2, /* 0 = DC_NODE */
#endif // CONFIG_EXT_INTER
#endif // CONFIG_REF_MV
vp10_copy(fc->inter_mode_probs, default_inter_mode_probs);
+#if CONFIG_OBMC
+ vp10_copy(fc->obmc_prob, default_obmc_prob);
+#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
vp10_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs);
#endif // CONFIG_EXT_INTER
counts->inter_mode[i], fc->inter_mode_probs[i]);
#endif
+#if CONFIG_OBMC
+ for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i)
+ fc->obmc_prob[i] = mode_mv_merge_probs(pre_fc->obmc_prob[i],
+ counts->obmc[i]);
+#endif // CONFIG_OBMC
+
#if CONFIG_SUPERTX
for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
int j;
vpx_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS]
[INTER_COMPOUND_MODES - 1];
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ vpx_prob obmc_prob[BLOCK_SIZES];
+#endif // CONFIG_OBMC
vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS];
vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1];
#if CONFIG_EXT_INTER
unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ unsigned int obmc[BLOCK_SIZES][2];
+#endif // CONFIG_OBMC
unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2];
#include "vp10/common/blockd.h"
#include "vp10/common/reconinter.h"
#include "vp10/common/reconintra.h"
+#if CONFIG_OBMC
+#include "vp10/common/onyxc_int.h"
+#endif // CONFIG_OBMC
#if CONFIG_VP9_HIGHBITDEPTH
void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
sf, w, h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4);
}
-void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+void build_inter_predictors(MACROBLOCKD *xd, int plane,
+#if CONFIG_OBMC
+ int mi_col_offset, int mi_row_offset,
+#endif // CONFIG_OBMC
+ int block,
int bw, int bh,
int x, int y, int w, int h,
int mi_x, int mi_y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
+#if CONFIG_OBMC
+ const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
+#else
const MODE_INFO *mi = xd->mi[0];
+#endif // CONFIG_OBMC
const int is_compound = has_second_ref(&mi->mbmi);
const INTERP_FILTER interp_filter = mi->mbmi.interp_filter;
int ref;
assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh);
for (y = 0; y < num_4x4_h; ++y)
for (x = 0; x < num_4x4_w; ++x)
- build_inter_predictors(xd, plane, y * 2 + x, bw, bh,
+ build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+ 0, 0,
+#endif // CONFIG_OBMC
+ y * 2 + x, bw, bh,
4 * x, 4 * y, pw, ph, mi_x, mi_y);
} else {
- build_inter_predictors(xd, plane, 0, bw, bh,
+ build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+ 0, 0,
+#endif // CONFIG_OBMC
+ 0, bw, bh,
0, 0, bw, bh, mi_x, mi_y);
}
}
}
}
-
void vp10_build_masked_inter_predictor_complex(
MACROBLOCKD *xd,
uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride,
const int bw = 4 * num_4x4_w;
const int bh = 4 * num_4x4_h;
- build_inter_predictors(xd, plane, block, bw, bh,
+ build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+ 0, 0,
+#endif // CONFIG_OBMC
+ block, bw, bh,
0, 0, bw, bh,
mi_x, mi_y);
}
}
#endif // CONFIG_SUPERTX
+
+#if CONFIG_OBMC
+// obmc_mask_N[is_neighbor_predictor][overlap_position]
+static const uint8_t obmc_mask_1[2][1] = {
+ { 55},
+ { 9}
+};
+
+static const uint8_t obmc_mask_2[2][2] = {
+ { 45, 62},
+ { 19, 2}
+};
+
+static const uint8_t obmc_mask_4[2][4] = {
+ { 39, 50, 59, 64},
+ { 25, 14, 5, 0}
+};
+
+static const uint8_t obmc_mask_8[2][8] = {
+ { 36, 42, 48, 53, 57, 61, 63, 64},
+ { 28, 22, 16, 11, 7, 3, 1, 0}
+};
+
+static const uint8_t obmc_mask_16[2][16] = {
+ { 34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 63, 64, 64, 64},
+ { 30, 27, 24, 21, 18, 15, 12, 10, 8, 6, 4, 3, 1, 0, 0, 0}
+};
+
+static const uint8_t obmc_mask_32[2][32] = {
+ { 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55,
+ 56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64},
+ { 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9,
+ 8, 7, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, 0, 0, 0}
+};
+
+void setup_obmc_mask(int length, const uint8_t *mask[2]) {
+ switch (length) {
+ case 1:
+ mask[0] = obmc_mask_1[0];
+ mask[1] = obmc_mask_1[1];
+ break;
+ case 2:
+ mask[0] = obmc_mask_2[0];
+ mask[1] = obmc_mask_2[1];
+ break;
+ case 4:
+ mask[0] = obmc_mask_4[0];
+ mask[1] = obmc_mask_4[1];
+ break;
+ case 8:
+ mask[0] = obmc_mask_8[0];
+ mask[1] = obmc_mask_8[1];
+ break;
+ case 16:
+ mask[0] = obmc_mask_16[0];
+ mask[1] = obmc_mask_16[1];
+ break;
+ case 32:
+ mask[0] = obmc_mask_32[0];
+ mask[1] = obmc_mask_32[1];
+ break;
+ default:
+ mask[0] = obmc_mask_32[0];
+ mask[1] = obmc_mask_32[1];
+ assert(0);
+ break;
+ }
+}
+
+// This function combines motion compensated predictions that is generated by
+// top/left neighboring blocks' inter predictors with the regular inter
+// prediction. We assume the original prediction (bmc) is stored in
+// xd->plane[].dst.buf
+void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
+ MACROBLOCKD *xd, int mi_row, int mi_col,
+ int use_tmp_dst_buf,
+ uint8_t *final_buf[MAX_MB_PLANE],
+ int final_stride[MAX_MB_PLANE],
+ uint8_t *tmp_buf1[MAX_MB_PLANE],
+ int tmp_stride1[MAX_MB_PLANE],
+ uint8_t *tmp_buf2[MAX_MB_PLANE],
+ int tmp_stride2[MAX_MB_PLANE]) {
+ const TileInfo *const tile = &xd->tile;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int plane, i, mi_step;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (use_tmp_dst_buf) {
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *pd = &xd->plane[plane];
+ int bw = (xd->n8_w * 8) >> pd->subsampling_x;
+ int bh = (xd->n8_h * 8) >> pd->subsampling_y;
+ int row;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hbd) {
+ uint16_t *final_buf16 = CONVERT_TO_SHORTPTR(final_buf[plane]);
+ uint16_t *bmc_buf16 = CONVERT_TO_SHORTPTR(pd->dst.buf);
+ for (row = 0; row < bh; ++row)
+ memcpy(final_buf16 + row * final_stride[plane],
+ bmc_buf16 + row * pd->dst.stride, bw * sizeof(uint16_t));
+ } else {
+#endif
+ for (row = 0; row < bh; ++row)
+ memcpy(final_buf[plane] + row * final_stride[plane],
+ pd->dst.buf + row * pd->dst.stride, bw);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+ // handle above row
+ for (i = 0; mi_row > 0 && i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col);
+ i += mi_step) {
+ int mi_row_offset = -1;
+ int mi_col_offset = i;
+ int overlap;
+ MODE_INFO *above_mi = xd->mi[mi_col_offset +
+ mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+
+ mi_step = VPXMIN(xd->n8_w,
+ num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+ if (!is_inter_block(above_mbmi))
+ continue;
+
+ overlap = (above_mbmi->skip) ?
+ num_4x4_blocks_high_lookup[bsize] << 1 :
+ VPXMIN(num_4x4_blocks_high_lookup[bsize],
+ num_4x4_blocks_high_lookup[above_mbmi->sb_type]) << 1;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *pd = &xd->plane[plane];
+ int bw = (mi_step * 8) >> pd->subsampling_x;
+ int bh = overlap >> pd->subsampling_y;
+ int row, col;
+ int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride;
+ uint8_t *dst = use_tmp_dst_buf ?
+ &final_buf[plane][(i * 8) >> pd->subsampling_x] :
+ &pd->dst.buf[(i * 8) >> pd->subsampling_x];
+ int bmc_stride = pd->dst.stride;
+ uint8_t *bmc = &pd->dst.buf[(i * 8) >> pd->subsampling_x];
+ int tmp_stride = tmp_stride1[plane];
+ uint8_t *tmp = &tmp_buf1[plane][(i * 8) >> pd->subsampling_x];
+ const uint8_t *mask[2];
+
+ setup_obmc_mask(bh, mask);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hbd) {
+ uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+ uint16_t *bmc16 = CONVERT_TO_SHORTPTR(bmc);
+ uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
+
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst16[col] = (mask[0][row] * bmc16[col] + mask[1][row] * tmp16[col]
+ + 32) >> 6;
+ }
+ dst16 += dst_stride;
+ bmc16 += bmc_stride;
+ tmp16 += tmp_stride;
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst[col] = (mask[0][row] * bmc[col] + mask[1][row] * tmp[col] + 32)
+ >> 6;
+ }
+ dst += dst_stride;
+ bmc += bmc_stride;
+ tmp += tmp_stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ } // each mi in the above row
+
+ if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) ||
+ (mi_col - 1) >= tile->mi_col_end)
+ return;
+ // handle left column
+ for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row);
+ i += mi_step) {
+ int mi_row_offset = i;
+ int mi_col_offset = -1;
+ int overlap;
+ MODE_INFO *left_mi = xd->mi[mi_col_offset +
+ mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+
+ mi_step = VPXMIN(xd->n8_h,
+ num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+ if (!is_inter_block(left_mbmi))
+ continue;
+
+ overlap = (left_mbmi->skip) ?
+ num_4x4_blocks_wide_lookup[bsize] << 1 :
+ VPXMIN(num_4x4_blocks_wide_lookup[bsize],
+ num_4x4_blocks_wide_lookup[left_mbmi->sb_type]) << 1;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *pd = &xd->plane[plane];
+ int bw = overlap >> pd->subsampling_x;
+ int bh = (mi_step * 8) >> pd->subsampling_y;
+ int row, col;
+ int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride;
+ uint8_t *dst = use_tmp_dst_buf ?
+ &final_buf[plane][(i * 8 * dst_stride) >> pd->subsampling_y] :
+ &pd->dst.buf[(i * 8 * dst_stride) >> pd->subsampling_y];
+ int bmc_stride = pd->dst.stride;
+ uint8_t *bmc = &pd->dst.buf[(i * 8 * bmc_stride) >> pd->subsampling_y];
+ int tmp_stride = tmp_stride2[plane];
+ uint8_t *tmp = &tmp_buf2[plane]
+ [(i * 8 * tmp_stride) >> pd->subsampling_y];
+ const uint8_t *mask[2];
+
+ setup_obmc_mask(bw, mask);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hbd) {
+ uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+ uint16_t *bmc16 = CONVERT_TO_SHORTPTR(bmc);
+ uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
+
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst16[col] = (mask[0][row] * bmc16[col] + mask[1][row] * tmp16[col]
+ + 32) >> 6;
+ }
+ dst16 += dst_stride;
+ bmc16 += bmc_stride;
+ tmp16 += tmp_stride;
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst[col] = (mask[0][col] * bmc[col] + mask[1][col] * tmp[col] + 32)
+ >> 6;
+ }
+ dst += dst_stride;
+ bmc += bmc_stride;
+ tmp += tmp_stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ } // each mi in the left column
+}
+#endif // CONFIG_OBMC
return res;
}
-void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
+void build_inter_predictors(MACROBLOCKD *xd, int plane,
+#if CONFIG_OBMC
+ int mi_col_offset, int mi_row_offset,
+#endif // CONFIG_OBMC
+ int block,
int bw, int bh,
int x, int y, int w, int h,
int mi_x, int mi_y);
return !intpel_mv;
}
#endif // CONFIG_EXT_INTERP
+
+#if CONFIG_OBMC
+void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
+ MACROBLOCKD *xd, int mi_row, int mi_col,
+ int use_tmp_dst_buf,
+ uint8_t *final_buf[MAX_MB_PLANE],
+ int final_stride[MAX_MB_PLANE],
+ uint8_t *tmp_buf1[MAX_MB_PLANE],
+ int tmp_stride1[MAX_MB_PLANE],
+ uint8_t *tmp_buf2[MAX_MB_PLANE],
+ int tmp_stride2[MAX_MB_PLANE]);
+#endif // CONFIG_OBMC
+
#ifdef __cplusplus
} // extern "C"
#endif
buf_stride, subpel_x, subpel_y;
uint8_t *ref_frame, *buf_ptr;
#if CONFIG_EXT_INTERP
- const int i_filter = IsInterpolatingFilter(xd->mi[0]->mbmi.interp_filter);
+ const int i_filter = IsInterpolatingFilter(interp_filter);
#endif // CONFIG_EXT_INTERP
// Get reference frame pointer, width and height.
xs = sf->x_step_q4;
ys = sf->y_step_q4;
} else {
+#if CONFIG_OBMC
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
+ pd->subsampling_x,
+ pd->subsampling_y);
+#endif // CONFIG_OBMC
// Co-ordinate of containing block to pixel precision.
x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
x0_16 = x0 << SUBPEL_BITS;
y0_16 = y0 << SUBPEL_BITS;
+#if CONFIG_OBMC
+ scaled_mv.row = mv_q4.row;
+ scaled_mv.col = mv_q4.col;
+#else
scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y));
scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x));
+#endif // CONFIG_OBMC
xs = ys = 16;
}
subpel_x = scaled_mv.col & SUBPEL_MASK;
}
}
}
+
#if CONFIG_SUPERTX
static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi,
MACROBLOCKD *xd,
}
}
}
-#endif
+#endif // CONFIG_SUPERTX
+
+#if CONFIG_OBMC
+static void dec_build_prediction_by_above_preds(VP10Decoder *const pbi,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]) {
+ VP10_COMMON *const cm = &pbi->common;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int i, j, mi_step, ref;
+
+ if (mi_row == 0)
+ return;
+
+ for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+ int mi_row_offset = -1;
+ int mi_col_offset = i;
+ int mi_x, mi_y, bw, bh;
+ const MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * cm->mi_stride];
+ const MB_MODE_INFO *mbmi = &mi->mbmi;
+ const BLOCK_SIZE sb_type = mbmi->sb_type;
+ const int is_compound = has_second_ref(mbmi);
+ const INTERP_FILTER interp_filter = mbmi->interp_filter;
+
+ mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[sb_type]);
+
+ if (!is_inter_block(mbmi))
+ continue;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *const pd = &xd->plane[j];
+ setup_pred_plane(&pd->dst,
+ tmp_buf[j], tmp_stride[j],
+ 0, i, NULL,
+ pd->subsampling_x, pd->subsampling_y);
+ }
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if ((!vp10_is_valid_scale(&ref_buf->sf)))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
+ &ref_buf->sf);
+ }
+
+ xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
+ mi_x = (mi_col + i) << MI_SIZE_LOG2;
+ mi_y = mi_row << MI_SIZE_LOG2;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *pd = &xd->plane[j];
+ struct buf_2d *const dst_buf = &pd->dst;
+ bw = (mi_step * 8) >> pd->subsampling_x;
+ bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
+ 4);
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ const int idx = xd->block_refs[ref]->idx;
+ BufferPool *const pool = pbi->common.buffer_pool;
+ RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
+ const int is_scaled = vp10_is_scaled(sf);
+
+ if (sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int pw = 8 >> (have_vsplit | pd->subsampling_x);
+ int x, y;
+
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
+ if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT)
+ && y == 0 && !pd->subsampling_y)
+ continue;
+
+ dec_build_inter_predictors(pbi, xd, j, bw, bh,
+ 4 * x, 0, pw, bh, mi_x, mi_y,
+ interp_filter, sf, pre_buf, dst_buf,
+ &mv, ref_frame_buf, is_scaled, ref);
+ }
+ } else {
+ const MV mv = mi->mbmi.mv[ref].as_mv;
+ dec_build_inter_predictors(pbi, xd, j, bw, bh,
+ 0, 0, bw, bh, mi_x, mi_y, interp_filter,
+ sf, pre_buf, dst_buf, &mv, ref_frame_buf,
+ is_scaled, ref);
+ }
+ }
+ }
+ }
+ xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+}
+
+static void dec_build_prediction_by_left_preds(VP10Decoder *const pbi,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]) {
+ VP10_COMMON *const cm = &pbi->common;
+ const TileInfo *const tile = &xd->tile;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int i, j, mi_step, ref;
+
+ if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) ||
+ (mi_col - 1) >= tile->mi_col_end)
+ return;
+
+ for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+ int mi_row_offset = i;
+ int mi_col_offset = -1;
+ int mi_x, mi_y, bw, bh;
+ const MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * cm->mi_stride];
+ const MB_MODE_INFO *mbmi = &mi->mbmi;
+ const BLOCK_SIZE sb_type = mbmi->sb_type;
+ const int is_compound = has_second_ref(mbmi);
+ const INTERP_FILTER interp_filter = mbmi->interp_filter;
+
+ mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[sb_type]);
+
+ if (!is_inter_block(mbmi))
+ continue;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *const pd = &xd->plane[j];
+ setup_pred_plane(&pd->dst,
+ tmp_buf[j], tmp_stride[j],
+ i, 0, NULL,
+ pd->subsampling_x, pd->subsampling_y);
+ }
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if ((!vp10_is_valid_scale(&ref_buf->sf)))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
+ &ref_buf->sf);
+ }
+
+ xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
+ mi_x = mi_col << MI_SIZE_LOG2;
+ mi_y = (mi_row + i) << MI_SIZE_LOG2;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *pd = &xd->plane[j];
+ struct buf_2d *const dst_buf = &pd->dst;
+ bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
+ 4);
+ bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ const int idx = xd->block_refs[ref]->idx;
+ BufferPool *const pool = pbi->common.buffer_pool;
+ RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
+ const int is_scaled = vp10_is_scaled(sf);
+
+ if (sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int ph = 8 >> (have_hsplit | pd->subsampling_y);
+ int x, y;
+
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
+ if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT)
+ && x == 0 && !pd->subsampling_x)
+ continue;
+
+ dec_build_inter_predictors(pbi, xd, j, bw, bh,
+ 0, 4 * y, bw, ph, mi_x, mi_y,
+ interp_filter, sf, pre_buf, dst_buf,
+ &mv, ref_frame_buf, is_scaled, ref);
+ }
+ } else {
+ const MV mv = mi->mbmi.mv[ref].as_mv;
+ dec_build_inter_predictors(pbi, xd, j, bw, bh,
+ 0, 0, bw, bh, mi_x, mi_y, interp_filter,
+ sf, pre_buf, dst_buf, &mv, ref_frame_buf,
+ is_scaled, ref);
+ }
+ }
+ }
+ }
+ xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+}
+#endif // CONFIG_OBMC
+
static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi,
int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
} else {
// Prediction
dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
+#if CONFIG_OBMC
+ if (mbmi->obmc) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
+ int dst_stride1[MAX_MB_PLANE] = {64, 64, 64};
+ int dst_stride2[MAX_MB_PLANE] = {64, 64, 64};
+
+ assert(mbmi->sb_type >= BLOCK_8X8);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + 4096;
+ dst_buf1[2] = tmp_buf1 + 8192;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + 4096;
+ dst_buf2[2] = tmp_buf2 + 8192;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ dec_build_prediction_by_above_preds(pbi, xd, mi_row, mi_col,
+ dst_buf1, dst_stride1);
+ dec_build_prediction_by_left_preds(pbi, xd, mi_row, mi_col,
+ dst_buf2, dst_stride2);
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm),
+ mi_row, mi_col);
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0, NULL, NULL,
+ dst_buf1, dst_stride1,
+ dst_buf2, dst_stride2);
+ }
+#endif // CONFIG_OBMC
// Reconstruction
if (!mbmi->skip) {
read_inter_compound_mode_probs(fc, &r);
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i)
+ vp10_diff_update_prob(&r, &fc->obmc_prob[i]);
+#endif // CONFIG_OBMC
+
if (cm->interp_filter == SWITCHABLE)
read_switchable_interp_probs(fc, &r);
zero_counts.inter_compound_mode,
sizeof(cm->counts.inter_compound_mode)));
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ assert(!memcmp(cm->counts.obmc, zero_counts.obmc,
+ sizeof(cm->counts.obmc)));
+#endif // CONFIG_OBMC
assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter,
sizeof(cm->counts.intra_inter)));
assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter,
}
+#if CONFIG_OBMC
+static int read_is_obmc_block(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ vpx_reader *r) {
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ FRAME_COUNTS *counts = xd->counts;
+ int is_obmc;
+
+ if (is_obmc_allowed(&xd->mi[0]->mbmi)) {
+ is_obmc = vpx_read(r, cm->fc->obmc_prob[bsize]);
+ if (counts)
+ ++counts->obmc[bsize][is_obmc];
+ return is_obmc;
+ } else {
+ return 0;
+ }
+}
+#endif // CONFIG_OBMC
+
static INLINE INTERP_FILTER read_switchable_interp_filter(
VP10_COMMON *const cm, MACROBLOCKD *const xd,
vpx_reader *r) {
static void read_inter_block_mode_info(VP10Decoder *const pbi,
MACROBLOCKD *const xd,
MODE_INFO *const mi,
+#if CONFIG_OBMC && CONFIG_SUPERTX
+ int mi_row, int mi_col, vpx_reader *r,
+ int supertx_enabled) {
+#else
int mi_row, int mi_col, vpx_reader *r) {
+#endif // CONFIG_OBMC && CONFIG_SUPERTX
VP10_COMMON *const cm = &pbi->common;
MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE bsize = mbmi->sb_type;
mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
}
+#if CONFIG_OBMC
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+ mbmi->obmc = read_is_obmc_block(cm, xd, r);
+#endif // CONFIG_OBMC
+
#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (is_compound)
xd->mi[0]->mbmi.tx_size = xd->supertx_size;
for (idy = 0; idy < height; ++idy)
for (idx = 0; idx < width; ++idx)
- xd->mi[0]->mbmi.inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = xd->supertx_size;
+ xd->mi[0]->mbmi.inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] =
+ xd->supertx_size;
}
#endif // CONFIG_VAR_TX
#endif // CONFIG_SUPERTX
if (inter_block)
read_inter_block_mode_info(pbi, xd,
+#if CONFIG_OBMC && CONFIG_SUPERTX
+ mi, mi_row, mi_col, r, supertx_enabled);
+#else
mi, mi_row, mi_col, r);
+#endif // CONFIG_OBMC && CONFIG_SUPERTX
else
read_intra_block_mode_info(cm, xd, mi, r);
} else {
int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
write_ref_frames(cm, xd, w);
+#if CONFIG_OBMC
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+ if (is_obmc_allowed(mbmi))
+ vpx_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]);
+#endif // CONFIG_OBMC
#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
update_inter_compound_mode_probs(cm, &header_bc);
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i)
+ vp10_cond_prob_diff_update(&header_bc, &fc->obmc_prob[i],
+ counts->obmc[i]);
+#endif // CONFIG_OBMC
+
if (cm->interp_filter == SWITCHABLE)
update_switchable_interp_probs(cm, &header_bc, counts);
[ref0 != GOLDEN_FRAME]++;
#endif // CONFIG_EXT_REFS
}
+#if CONFIG_OBMC
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+ if (is_obmc_allowed(mbmi))
+ counts->obmc[mbmi->sb_type][mbmi->obmc]++;
+#endif // CONFIG_OBMC
}
}
if (inter_block &&
vp10_build_inter_predictors_sbuv(xd, mi_row, mi_col,
VPXMAX(bsize, BLOCK_8X8));
+#if CONFIG_OBMC
+ if (mbmi->obmc) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
+ int dst_stride1[MAX_MB_PLANE] = {64, 64, 64};
+ int dst_stride2[MAX_MB_PLANE] = {64, 64, 64};
+
+ assert(mbmi->sb_type >= BLOCK_8X8);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + 4096;
+ dst_buf1[2] = tmp_buf1 + 8192;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + 4096;
+ dst_buf2[2] = tmp_buf2 + 8192;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1,
+ dst_stride1);
+ vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2,
+ dst_stride2);
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm),
+ mi_row, mi_col);
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0, NULL, NULL,
+ dst_buf1, dst_stride1,
+ dst_buf2, dst_stride2);
+ }
+
+#endif // CONFIG_OBMC
+
vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
#if CONFIG_VAR_TX
vp10_tokenize_sb_inter(cpi, td, t, !output_enabled,
unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS]
[INTER_COMPOUND_MODES];
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ int obmc_cost[BLOCK_SIZES][2];
+#endif // CONFIG_OBMC
int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES];
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
cm->fc->inter_compound_mode_probs[i],
vp10_inter_compound_mode_tree);
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) {
+ cpi->obmc_cost[i][0] = vp10_cost_bit(cm->fc->obmc_prob[i], 0);
+ cpi->obmc_cost[i][1] = vp10_cost_bit(cm->fc->obmc_prob[i], 1);
+ }
+#endif // CONFIG_OBMC
}
}
xd->mb_to_top_edge - LEFT_TOP_MARGIN,
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
}
-
static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi,
const MACROBLOCK *x,
const BLOCK_SIZE bsize,
(*single_filter)[MAX_REF_FRAMES]
) {
INTERP_FILTER best_filter = SWITCHABLE;
-
const VP10_COMMON *cm = &cpi->common;
const MACROBLOCKD *xd = &x->e_mbd;
int bsl = mi_width_log2_lookup[bsize];
int pred_filter_search = cpi->sf.cb_pred_filter_search ?
(((mi_row + mi_col) >> bsl) +
- get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
+ get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const int is_comp_pred = has_second_ref(mbmi);
const int this_mode = mbmi->mode;
int refs[2] = { mbmi->ref_frame[0],
- (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
-
+ (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
if (pred_filter_search) {
INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
if (xd->up_available)
int *disable_skip,
int_mv (*mode_mv)[MAX_REF_FRAMES],
int mi_row, int mi_col,
+#if CONFIG_OBMC
+ uint8_t *dst_buf1[3],
+ int dst_stride1[3],
+ uint8_t *dst_buf2[3],
+ int dst_stride2[3],
+#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
int_mv single_newmvs[2][MAX_REF_FRAMES],
#else
#else
DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
#endif // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_OBMC
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * 64 * 64]);
+ uint8_t *tmp_buf1;
+ uint8_t *obmc_tmp_buf[3];
+#else
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
+ uint8_t *obmc_tmp_buf[3] = {tmp_buf1, tmp_buf1 + 4096, tmp_buf1 + 8192};
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ int obmc_tmp_stride[3] = {64, 64, 64};
+ int best_obmc_flag = 0;
+ uint8_t tmp_skip_txfm[MAX_MB_PLANE << 2] = {0};
+ int64_t tmp_bsse[MAX_MB_PLANE << 2] = {0};
+ int64_t rdobmc;
+ int skip_txfm_sb_obmc = 0;
+ int64_t skip_sse_sb_obmc = INT64_MAX;
+ int allow_obmc = is_obmc_allowed(mbmi);
+#endif // CONFIG_OBMC
int pred_exists = 0;
int intpel_mv;
int64_t rd, tmp_rd, best_rd = INT64_MAX;
int64_t distortion_y = 0, distortion_uv = 0;
int16_t mode_ctx = mbmi_ext->mode_context[refs[0]];
+#if CONFIG_OBMC
+ tmp_rd = 0;
+#endif // CONFIG_OBMC
#if CONFIG_REF_MV
#if CONFIG_EXT_INTER
if (is_comp_pred)
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
+#if CONFIG_OBMC
+ tmp_buf1 = CONVERT_TO_BYTEPTR(tmp_buf1_16);
+#endif // CONFIG_OBMC
} else {
tmp_buf = (uint8_t *)tmp_buf16;
- }
+#if CONFIG_OBMC
+ tmp_buf1 = (uint8_t *)tmp_buf1_16;
+#endif // CONFIG_OBMC
+ }
+#if CONFIG_OBMC
+ obmc_tmp_buf[0] = tmp_buf1;
+ obmc_tmp_buf[1] = tmp_buf1 + 4096;
+ obmc_tmp_buf[2] = tmp_buf1 + 8192;
+#endif // CONFIG_OBMC
#endif // CONFIG_VP9_HIGHBITDEPTH
if (is_comp_pred) {
if (this_mode == NEARMV && is_comp_pred) {
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
- int ref_mv_idx = mbmi->ref_mv_idx + 1;
- cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
- cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][1].this_mv;
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][1].comp_mv;
for (i = 0; i < 2; ++i) {
lower_mv_precision(&cur_mv[i].as_mv, cm->allow_high_precision_mv);
int64_t rs_rd;
int tmp_skip_sb = 0;
int64_t tmp_skip_sse = INT64_MAX;
+#if CONFIG_OBMC
+ int obmc_flag = 0;
+ int tmp_skip_sb_obmc = 0;
+ int64_t tmp_skip_sse_obmc = INT64_MAX;
+#endif // CONFIG_OBMC
mbmi->interp_filter = i;
rs = vp10_get_switchable_rate(cpi, xd);
VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
+#if CONFIG_OBMC
+ if (allow_obmc) {
+ obmc_flag = best_obmc_flag;
+ rd += RDCOST(x->rdmult, x->rddiv,
+ cpi->obmc_cost[bsize][obmc_flag], 0);
+ }
+#endif // CONFIG_OBMC
*mask_filter = VPXMAX(*mask_filter, rd);
} else {
int rate_sum = 0;
int64_t dist_sum = 0;
+#if CONFIG_OBMC
+ int rate_sum_obmc = 0;
+ int64_t dist_sum_obmc = 0;
+#endif // CONFIG_OBMC
if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
(cpi->sf.interp_filter_search_mask & (1 << i))) {
rate_sum = INT_MAX;
&tmp_skip_sb, &tmp_skip_sse);
rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
+#if CONFIG_OBMC
+ if (allow_obmc) {
+ rd += RDCOST(x->rdmult, x->rddiv, cpi->obmc_cost[bsize][0], 0);
+ memcpy(tmp_skip_txfm, x->skip_txfm, sizeof(tmp_skip_txfm));
+ memcpy(tmp_bsse, x->bsse, sizeof(tmp_bsse));
+
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
+ obmc_tmp_buf, obmc_tmp_stride,
+ dst_buf1, dst_stride1,
+ dst_buf2, dst_stride2);
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ xd->plane[j].dst.buf = obmc_tmp_buf[j];
+ xd->plane[j].dst.stride = obmc_tmp_stride[j];
+ }
+ model_rd_for_sb(cpi, bsize, x, xd, &rate_sum_obmc, &dist_sum_obmc,
+ &tmp_skip_sb_obmc, &tmp_skip_sse_obmc);
+ rdobmc = RDCOST(x->rdmult, x->rddiv,
+ rate_sum_obmc + cpi->obmc_cost[bsize][1],
+ dist_sum_obmc);
+
+ if ((double)rdobmc <= 0.99 * (double)rd) {
+ obmc_flag = 1;
+ rd = rdobmc;
+ rate_sum = rate_sum_obmc;
+ dist_sum = dist_sum_obmc;
+ tmp_skip_sb = tmp_skip_sb_obmc;
+ tmp_skip_sse = tmp_skip_sse_obmc;
+ } else {
+ obmc_flag = 0;
+ memcpy(x->skip_txfm, tmp_skip_txfm, sizeof(tmp_skip_txfm));
+ memcpy(x->bsse, tmp_bsse, sizeof(tmp_bsse));
+ }
+ }
+#endif // CONFIG_OBMC
filter_cache[i] = rd;
filter_cache[SWITCHABLE_FILTERS] =
VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (newbest) {
best_rd = rd;
best_filter = mbmi->interp_filter;
+#if CONFIG_OBMC
+ if (allow_obmc)
+ best_obmc_flag = obmc_flag;
+#endif // CONFIG_OBMC
if (cm->interp_filter == SWITCHABLE && i &&
!(intpel_mv && IsInterpolatingFilter(i)))
best_needs_copy = !best_needs_copy;
mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
cm->interp_filter : best_filter;
rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
+#if CONFIG_OBMC
+ if (allow_obmc)
+ mbmi->obmc = best_obmc_flag;
+ else
+ mbmi->obmc = 0;
+#endif // CONFIG_OBMC
+#if CONFIG_OBMC
+ if (pred_exists && !mbmi->obmc) {
+#else
if (pred_exists) {
+#endif // CONFIG_OBMC
if (best_needs_copy) {
// again temporarily set the buffers to local memory to prevent a memcpy
for (i = 0; i < MAX_MB_PLANE; i++) {
}
}
rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
+#if CONFIG_OBMC
+ if (allow_obmc)
+ rd += RDCOST(x->rdmult, x->rddiv,
+ cpi->obmc_cost[bsize][mbmi->obmc], 0);
+#endif // CONFIG_OBMC
} else {
int tmp_rate;
int64_t tmp_dist;
+#if CONFIG_OBMC
+ int tmp_rate_obmc;
+ int64_t tmp_dist_obmc;
+#endif // CONFIG_OBMC
// Handles the special case when a filter that is not in the
// switchable list (ex. bilinear) is indicated at the frame level, or
// skip condition holds.
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+#if CONFIG_OBMC
+ if (mbmi->obmc) {
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
+ obmc_tmp_buf, obmc_tmp_stride,
+ dst_buf1, dst_stride1,
+ dst_buf2, dst_stride2);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ xd->plane[i].dst.buf = obmc_tmp_buf[i];
+ xd->plane[i].dst.stride = obmc_tmp_stride[i];
+ }
+ model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
+ &skip_txfm_sb, &skip_sse_sb);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate + cpi->obmc_cost[bsize][1],
+ tmp_dist);
+ } else {
+#endif // CONFIG_OBMC
model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
&skip_txfm_sb, &skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+#if CONFIG_OBMC
+ if (allow_obmc) {
+ rd += RDCOST(x->rdmult, x->rddiv, cpi->obmc_cost[bsize][0], 0);
+ memcpy(tmp_skip_txfm, x->skip_txfm, sizeof(tmp_skip_txfm));
+ memcpy(tmp_bsse, x->bsse, sizeof(tmp_bsse));
+
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
+ obmc_tmp_buf, obmc_tmp_stride,
+ dst_buf1, dst_stride1,
+ dst_buf2, dst_stride2);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ xd->plane[i].dst.buf = obmc_tmp_buf[i];
+ xd->plane[i].dst.stride = obmc_tmp_stride[i];
+ }
+ model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate_obmc, &tmp_dist_obmc,
+ &skip_txfm_sb_obmc, &skip_sse_sb_obmc);
+ rdobmc = RDCOST(x->rdmult, x->rddiv,
+ rs + tmp_rate_obmc + cpi->obmc_cost[bsize][1],
+ tmp_dist_obmc);
+ if ((double)rdobmc <= 0.99 * (double)rd) {
+ mbmi->obmc = 1;
+ rd = rdobmc;
+ skip_txfm_sb = skip_txfm_sb_obmc;
+ skip_sse_sb = skip_sse_sb_obmc;
+ } else {
+ mbmi->obmc = 0;
+ memcpy(x->skip_txfm, tmp_skip_txfm, sizeof(tmp_skip_txfm));
+ memcpy(x->bsse, tmp_bsse, sizeof(tmp_bsse));
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ }
+ } else {
+ mbmi->obmc = 0;
+ }
+ }
+#endif // CONFIG_OBMC
memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
memcpy(bsse, x->bsse, sizeof(bsse));
}
if (cm->interp_filter == SWITCHABLE)
*rate2 += rs;
+#if CONFIG_OBMC
+ if (allow_obmc)
+ *rate2 += cpi->obmc_cost[bsize][mbmi->obmc];
+#endif // CONFIG_OBMC
memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
memcpy(x->bsse, bsse, sizeof(bsse));
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+#if CONFIG_OBMC
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *dst_buf1[3], *dst_buf2[3];
+ int dst_stride1[3] = {64, 64, 64};
+ int dst_stride2[3] = {64, 64, 64};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + 4096;
+ dst_buf1[2] = tmp_buf1 + 8192;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + 4096;
+ dst_buf2[2] = tmp_buf2 + 8192;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_OBMC
vp10_zero(best_mbmode);
}
#endif
+#if CONFIG_OBMC
+ vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1,
+ dst_stride1);
+ vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2,
+ dst_stride2);
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+#endif // CONFIG_OBMC
+
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
// Skip checking missing references in both single and compound reference
mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
: cm->interp_filter;
mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
+#if CONFIG_OBMC
+ mbmi->obmc = 0;
+#endif // CONFIG_OBMC
x->skip = 0;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
&rate_y, &rate_uv,
&disable_skip, frame_mv,
mi_row, mi_col,
+#if CONFIG_OBMC
+ dst_buf1, dst_stride1,
+ dst_buf2, dst_stride2,
+#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
single_newmvs,
#else
&tmp_rate_y, &tmp_rate_uv,
&dummy_disable_skip, frame_mv,
mi_row, mi_col,
+#if CONFIG_OBMC
+ dst_buf1, dst_stride1,
+ dst_buf2, dst_stride2,
+#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
dummy_single_newmvs,
#else
}
*returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
mbmi->ref_frame[0] != INTRA_FRAME);
+#if CONFIG_OBMC
+ if (is_inter_block(mbmi) && is_obmc_allowed(mbmi))
+ *returnrate_nocoef -= cpi->obmc_cost[bsize][mbmi->obmc];
+#endif // CONFIG_OBMC
#endif // CONFIG_SUPERTX
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
#endif // CONFIG_EXT_INTRA
+#if CONFIG_OBMC
+ mbmi->obmc = 0;
+#endif // CONFIG_OBMC
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
filter_cache[i] = INT64_MAX;
store_coding_context(x, ctx, best_ref_index,
best_pred_diff, best_filter_diff, 0);
}
+
+#if CONFIG_OBMC
+void vp10_build_prediction_by_above_preds(VP10_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]) {
+ VP10_COMMON *const cm = &cpi->common;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int i, j, mi_step, ref;
+
+ if (mi_row == 0)
+ return;
+
+ for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+ int mi_row_offset = -1;
+ int mi_col_offset = i;
+ int mi_x, mi_y, bw, bh;
+ MODE_INFO *above_mi = xd->mi[mi_col_offset +
+ mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+
+ mi_step = VPXMIN(xd->n8_w,
+ num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+ if (!is_inter_block(above_mbmi))
+ continue;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *const pd = &xd->plane[j];
+ setup_pred_plane(&pd->dst,
+ tmp_buf[j], tmp_stride[j],
+ 0, i, NULL,
+ pd->subsampling_x, pd->subsampling_y);
+ }
+ set_ref_ptrs(cm, xd, above_mbmi->ref_frame[0], above_mbmi->ref_frame[1]);
+ for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
+ YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
+ above_mbmi->ref_frame[ref]);
+ assert(cfg != NULL);
+ vp10_setup_pre_planes(xd, ref, cfg, mi_row, mi_col + i,
+ &xd->block_refs[ref]->sf);
+ }
+
+ xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
+ mi_x = (mi_col + i) << MI_SIZE_LOG2;
+ mi_y = mi_row << MI_SIZE_LOG2;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ const struct macroblockd_plane *pd = &xd->plane[j];
+ bw = (mi_step * 8) >> pd->subsampling_x;
+ bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
+ 4);
+
+ if (above_mbmi->sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - above_mbmi->sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int pw = 8 >> (have_vsplit | pd->subsampling_x);
+ int x, y;
+
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT)
+ && y == 0 && !pd->subsampling_y)
+ continue;
+
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+ y * 2 + x, bw, bh,
+ 4 * x, 0, pw, bh, mi_x, mi_y);
+ }
+ } else {
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0,
+ bw, bh, 0, 0, bw, bh, mi_x, mi_y);
+ }
+ }
+ }
+ xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+}
+
+void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]) {
+ VP10_COMMON *const cm = &cpi->common;
+ const TileInfo *const tile = &xd->tile;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int i, j, mi_step, ref;
+
+ if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) ||
+ (mi_col - 1) >= tile->mi_col_end)
+ return;
+
+ for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+ int mi_row_offset = i;
+ int mi_col_offset = -1;
+ int mi_x, mi_y, bw, bh;
+ MODE_INFO *left_mi = xd->mi[mi_col_offset +
+ mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+
+ mi_step = VPXMIN(xd->n8_h,
+ num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+ if (!is_inter_block(left_mbmi))
+ continue;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *const pd = &xd->plane[j];
+ setup_pred_plane(&pd->dst,
+ tmp_buf[j], tmp_stride[j],
+ i, 0, NULL,
+ pd->subsampling_x, pd->subsampling_y);
+ }
+ set_ref_ptrs(cm, xd, left_mbmi->ref_frame[0], left_mbmi->ref_frame[1]);
+ for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
+ YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
+ left_mbmi->ref_frame[ref]);
+ assert(cfg != NULL);
+ vp10_setup_pre_planes(xd, ref, cfg, mi_row + i, mi_col,
+ &xd->block_refs[ref]->sf);
+ }
+
+ xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
+ mi_x = mi_col << MI_SIZE_LOG2;
+ mi_y = (mi_row + i) << MI_SIZE_LOG2;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ const struct macroblockd_plane *pd = &xd->plane[j];
+ bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
+ 4);
+ bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+ if (left_mbmi->sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - left_mbmi->sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int ph = 8 >> (have_hsplit | pd->subsampling_y);
+ int x, y;
+
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT)
+ && x == 0 && !pd->subsampling_x)
+ continue;
+
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+ y * 2 + x, bw, bh,
+ 0, 4 * y, bw, ph, mi_x, mi_y);
+ }
+ } else {
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0,
+ bw, bh, 0, 0, bw, bh, mi_x, mi_y);
+ }
+ }
+ }
+ xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+}
+#endif // CONFIG_OBMC
int use_fast_coef_casting);
#endif // CONFIG_SUPERTX
+#if CONFIG_OBMC
+void vp10_build_prediction_by_above_preds(VP10_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]);
+void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]);
+#endif // CONFIG_OBMC
#ifdef __cplusplus
} // extern "C"
#endif