From d1cad9c3f5dfbc4ac0f8111439402cb60837ef00 Mon Sep 17 00:00:00 2001 From: Yue Chen Date: Wed, 27 Jan 2016 14:18:53 -0800 Subject: [PATCH] Overlapped block motion compensation experiment In this experiment, an obmc inter prediction mode is enabled for >= 8X8 inter blocks. When the obmc flag is on, the regular block- based motion compensation will be refined by using predictors of the above and left blocks. Fixed some compatibility issues with vp9_highbitdepth, supertx, ref_mv, and ext_interp. Coding gain (%) on derflr/hevcmr/hevchd OBMC: 1.047/1.022/0.708 OBMC + SUPERTX: 1.652/1.616/1.137 SUPERTX: 0.862/0.779/0.630 Change-Id: I5d8d3c4729c6d3ccb03ec7034563107893103b7f --- vp10/common/blockd.h | 10 + vp10/common/entropymode.c | 15 ++ vp10/common/entropymode.h | 6 + vp10/common/reconinter.c | 290 +++++++++++++++++++++++++- vp10/common/reconinter.h | 19 +- vp10/decoder/decodeframe.c | 274 ++++++++++++++++++++++++- vp10/decoder/decodemv.c | 37 +++- vp10/encoder/bitstream.c | 13 ++ vp10/encoder/encodeframe.c | 56 +++++ vp10/encoder/encoder.h | 3 + vp10/encoder/rd.c | 6 + vp10/encoder/rdopt.c | 404 ++++++++++++++++++++++++++++++++++++- vp10/encoder/rdopt.h | 12 ++ 13 files changed, 1127 insertions(+), 18 deletions(-) diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index 4a3e67ca7..6a69231be 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -167,6 +167,10 @@ typedef struct { INTRA_FILTER intra_filter; #endif // CONFIG_EXT_INTRA +#if CONFIG_OBMC + int8_t obmc; +#endif // CONFIG_OBMC + int_mv mv[2]; int_mv pred_mv[2]; #if CONFIG_REF_MV @@ -192,6 +196,12 @@ static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { return mbmi->ref_frame[1] > INTRA_FRAME; } +#if CONFIG_OBMC +static INLINE int is_obmc_allowed(const MB_MODE_INFO *mbmi) { + return (mbmi->sb_type >= BLOCK_8X8); +} +#endif // CONFIG_OBMC + PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *left_mi, int b); diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index 77f537e0d..735e10e80 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -228,6 +228,12 @@ static const vpx_prob default_inter_compound_mode_probs }; #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC +static const vpx_prob default_obmc_prob[BLOCK_SIZES] = { + 255, 255, 255, 151, 153, 144, 178, 165, 160, 207, 195, 168, 244, +}; +#endif // CONFIG_OBMC + /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { -DC_PRED, 2, /* 0 = DC_NODE */ @@ -1303,6 +1309,9 @@ static void init_mode_probs(FRAME_CONTEXT *fc) { #endif // CONFIG_EXT_INTER #endif // CONFIG_REF_MV vp10_copy(fc->inter_mode_probs, default_inter_mode_probs); +#if CONFIG_OBMC + vp10_copy(fc->obmc_prob, default_obmc_prob); +#endif // CONFIG_OBMC #if CONFIG_EXT_INTER vp10_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs); #endif // CONFIG_EXT_INTER @@ -1383,6 +1392,12 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { counts->inter_mode[i], fc->inter_mode_probs[i]); #endif +#if CONFIG_OBMC + for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i) + fc->obmc_prob[i] = mode_mv_merge_probs(pre_fc->obmc_prob[i], + counts->obmc[i]); +#endif // CONFIG_OBMC + #if CONFIG_SUPERTX for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { int j; diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index 4b4bdf12f..d581a08cd 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h @@ -81,6 +81,9 @@ typedef struct frame_contexts { vpx_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS] [INTER_COMPOUND_MODES - 1]; #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC + vpx_prob obmc_prob[BLOCK_SIZES]; +#endif // CONFIG_OBMC vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1]; @@ -135,6 +138,9 @@ typedef struct FRAME_COUNTS { #if CONFIG_EXT_INTER unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES]; #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC + unsigned int obmc[BLOCK_SIZES][2]; +#endif // CONFIG_OBMC unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2]; diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c index 60592fd70..efc1ee1e3 100644 --- a/vp10/common/reconinter.c +++ b/vp10/common/reconinter.c @@ -18,6 +18,9 @@ #include "vp10/common/blockd.h" #include "vp10/common/reconinter.h" #include "vp10/common/reconintra.h" +#if CONFIG_OBMC +#include "vp10/common/onyxc_int.h" +#endif // CONFIG_OBMC #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride, @@ -64,12 +67,20 @@ void vp10_build_inter_predictor(const uint8_t *src, int src_stride, sf, w, h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4); } -void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, +void build_inter_predictors(MACROBLOCKD *xd, int plane, +#if CONFIG_OBMC + int mi_col_offset, int mi_row_offset, +#endif // CONFIG_OBMC + int block, int bw, int bh, int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; +#if CONFIG_OBMC + const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset]; +#else const MODE_INFO *mi = xd->mi[0]; +#endif // CONFIG_OBMC const int is_compound = has_second_ref(&mi->mbmi); const INTERP_FILTER interp_filter = mi->mbmi.interp_filter; int ref; @@ -201,10 +212,18 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) - build_inter_predictors(xd, plane, y * 2 + x, bw, bh, + build_inter_predictors(xd, plane, +#if CONFIG_OBMC + 0, 0, +#endif // CONFIG_OBMC + y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph, mi_x, mi_y); } else { - build_inter_predictors(xd, plane, 0, bw, bh, + build_inter_predictors(xd, plane, +#if CONFIG_OBMC + 0, 0, +#endif // CONFIG_OBMC + 0, bw, bh, 0, 0, bw, bh, mi_x, mi_y); } } @@ -309,7 +328,6 @@ static void generate_1dmask(int length, uint8_t *mask, int plane) { } } - void vp10_build_masked_inter_predictor_complex( MACROBLOCKD *xd, uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride, @@ -483,9 +501,271 @@ void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - build_inter_predictors(xd, plane, block, bw, bh, + build_inter_predictors(xd, plane, +#if CONFIG_OBMC + 0, 0, +#endif // CONFIG_OBMC + block, bw, bh, 0, 0, bw, bh, mi_x, mi_y); } } #endif // CONFIG_SUPERTX + +#if CONFIG_OBMC +// obmc_mask_N[is_neighbor_predictor][overlap_position] +static const uint8_t obmc_mask_1[2][1] = { + { 55}, + { 9} +}; + +static const uint8_t obmc_mask_2[2][2] = { + { 45, 62}, + { 19, 2} +}; + +static const uint8_t obmc_mask_4[2][4] = { + { 39, 50, 59, 64}, + { 25, 14, 5, 0} +}; + +static const uint8_t obmc_mask_8[2][8] = { + { 36, 42, 48, 53, 57, 61, 63, 64}, + { 28, 22, 16, 11, 7, 3, 1, 0} +}; + +static const uint8_t obmc_mask_16[2][16] = { + { 34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 63, 64, 64, 64}, + { 30, 27, 24, 21, 18, 15, 12, 10, 8, 6, 4, 3, 1, 0, 0, 0} +}; + +static const uint8_t obmc_mask_32[2][32] = { + { 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, + 56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64}, + { 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9, + 8, 7, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, 0, 0, 0} +}; + +void setup_obmc_mask(int length, const uint8_t *mask[2]) { + switch (length) { + case 1: + mask[0] = obmc_mask_1[0]; + mask[1] = obmc_mask_1[1]; + break; + case 2: + mask[0] = obmc_mask_2[0]; + mask[1] = obmc_mask_2[1]; + break; + case 4: + mask[0] = obmc_mask_4[0]; + mask[1] = obmc_mask_4[1]; + break; + case 8: + mask[0] = obmc_mask_8[0]; + mask[1] = obmc_mask_8[1]; + break; + case 16: + mask[0] = obmc_mask_16[0]; + mask[1] = obmc_mask_16[1]; + break; + case 32: + mask[0] = obmc_mask_32[0]; + mask[1] = obmc_mask_32[1]; + break; + default: + mask[0] = obmc_mask_32[0]; + mask[1] = obmc_mask_32[1]; + assert(0); + break; + } +} + +// This function combines motion compensated predictions that is generated by +// top/left neighboring blocks' inter predictors with the regular inter +// prediction. We assume the original prediction (bmc) is stored in +// xd->plane[].dst.buf +void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, + MACROBLOCKD *xd, int mi_row, int mi_col, + int use_tmp_dst_buf, + uint8_t *final_buf[MAX_MB_PLANE], + int final_stride[MAX_MB_PLANE], + uint8_t *tmp_buf1[MAX_MB_PLANE], + int tmp_stride1[MAX_MB_PLANE], + uint8_t *tmp_buf2[MAX_MB_PLANE], + int tmp_stride2[MAX_MB_PLANE]) { + const TileInfo *const tile = &xd->tile; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + int plane, i, mi_step; +#if CONFIG_VP9_HIGHBITDEPTH + int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (use_tmp_dst_buf) { + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int bw = (xd->n8_w * 8) >> pd->subsampling_x; + int bh = (xd->n8_h * 8) >> pd->subsampling_y; + int row; +#if CONFIG_VP9_HIGHBITDEPTH + if (is_hbd) { + uint16_t *final_buf16 = CONVERT_TO_SHORTPTR(final_buf[plane]); + uint16_t *bmc_buf16 = CONVERT_TO_SHORTPTR(pd->dst.buf); + for (row = 0; row < bh; ++row) + memcpy(final_buf16 + row * final_stride[plane], + bmc_buf16 + row * pd->dst.stride, bw * sizeof(uint16_t)); + } else { +#endif + for (row = 0; row < bh; ++row) + memcpy(final_buf[plane] + row * final_stride[plane], + pd->dst.buf + row * pd->dst.stride, bw); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + } + + // handle above row + for (i = 0; mi_row > 0 && i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); + i += mi_step) { + int mi_row_offset = -1; + int mi_col_offset = i; + int overlap; + MODE_INFO *above_mi = xd->mi[mi_col_offset + + mi_row_offset * xd->mi_stride]; + MB_MODE_INFO *above_mbmi = &above_mi->mbmi; + + mi_step = VPXMIN(xd->n8_w, + num_8x8_blocks_wide_lookup[above_mbmi->sb_type]); + + if (!is_inter_block(above_mbmi)) + continue; + + overlap = (above_mbmi->skip) ? + num_4x4_blocks_high_lookup[bsize] << 1 : + VPXMIN(num_4x4_blocks_high_lookup[bsize], + num_4x4_blocks_high_lookup[above_mbmi->sb_type]) << 1; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int bw = (mi_step * 8) >> pd->subsampling_x; + int bh = overlap >> pd->subsampling_y; + int row, col; + int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride; + uint8_t *dst = use_tmp_dst_buf ? + &final_buf[plane][(i * 8) >> pd->subsampling_x] : + &pd->dst.buf[(i * 8) >> pd->subsampling_x]; + int bmc_stride = pd->dst.stride; + uint8_t *bmc = &pd->dst.buf[(i * 8) >> pd->subsampling_x]; + int tmp_stride = tmp_stride1[plane]; + uint8_t *tmp = &tmp_buf1[plane][(i * 8) >> pd->subsampling_x]; + const uint8_t *mask[2]; + + setup_obmc_mask(bh, mask); + +#if CONFIG_VP9_HIGHBITDEPTH + if (is_hbd) { + uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); + uint16_t *bmc16 = CONVERT_TO_SHORTPTR(bmc); + uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); + + for (row = 0; row < bh; ++row) { + for (col = 0; col < bw; ++col) { + dst16[col] = (mask[0][row] * bmc16[col] + mask[1][row] * tmp16[col] + + 32) >> 6; + } + dst16 += dst_stride; + bmc16 += bmc_stride; + tmp16 += tmp_stride; + } + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + for (row = 0; row < bh; ++row) { + for (col = 0; col < bw; ++col) { + dst[col] = (mask[0][row] * bmc[col] + mask[1][row] * tmp[col] + 32) + >> 6; + } + dst += dst_stride; + bmc += bmc_stride; + tmp += tmp_stride; + } +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + } // each mi in the above row + + if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) || + (mi_col - 1) >= tile->mi_col_end) + return; + // handle left column + for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); + i += mi_step) { + int mi_row_offset = i; + int mi_col_offset = -1; + int overlap; + MODE_INFO *left_mi = xd->mi[mi_col_offset + + mi_row_offset * xd->mi_stride]; + MB_MODE_INFO *left_mbmi = &left_mi->mbmi; + + mi_step = VPXMIN(xd->n8_h, + num_8x8_blocks_high_lookup[left_mbmi->sb_type]); + + if (!is_inter_block(left_mbmi)) + continue; + + overlap = (left_mbmi->skip) ? + num_4x4_blocks_wide_lookup[bsize] << 1 : + VPXMIN(num_4x4_blocks_wide_lookup[bsize], + num_4x4_blocks_wide_lookup[left_mbmi->sb_type]) << 1; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int bw = overlap >> pd->subsampling_x; + int bh = (mi_step * 8) >> pd->subsampling_y; + int row, col; + int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride; + uint8_t *dst = use_tmp_dst_buf ? + &final_buf[plane][(i * 8 * dst_stride) >> pd->subsampling_y] : + &pd->dst.buf[(i * 8 * dst_stride) >> pd->subsampling_y]; + int bmc_stride = pd->dst.stride; + uint8_t *bmc = &pd->dst.buf[(i * 8 * bmc_stride) >> pd->subsampling_y]; + int tmp_stride = tmp_stride2[plane]; + uint8_t *tmp = &tmp_buf2[plane] + [(i * 8 * tmp_stride) >> pd->subsampling_y]; + const uint8_t *mask[2]; + + setup_obmc_mask(bw, mask); + +#if CONFIG_VP9_HIGHBITDEPTH + if (is_hbd) { + uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); + uint16_t *bmc16 = CONVERT_TO_SHORTPTR(bmc); + uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); + + for (row = 0; row < bh; ++row) { + for (col = 0; col < bw; ++col) { + dst16[col] = (mask[0][row] * bmc16[col] + mask[1][row] * tmp16[col] + + 32) >> 6; + } + dst16 += dst_stride; + bmc16 += bmc_stride; + tmp16 += tmp_stride; + } + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + for (row = 0; row < bh; ++row) { + for (col = 0; col < bw; ++col) { + dst[col] = (mask[0][col] * bmc[col] + mask[1][col] * tmp[col] + 32) + >> 6; + } + dst += dst_stride; + bmc += bmc_stride; + tmp += tmp_stride; + } +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + } // each mi in the left column +} +#endif // CONFIG_OBMC diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index 3fcdb97fe..2b36d613e 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h @@ -177,7 +177,11 @@ static INLINE MV average_split_mvs(const struct macroblockd_plane *pd, return res; } -void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, +void build_inter_predictors(MACROBLOCKD *xd, int plane, +#if CONFIG_OBMC + int mi_col_offset, int mi_row_offset, +#endif // CONFIG_OBMC + int block, int bw, int bh, int x, int y, int w, int h, int mi_x, int mi_y); @@ -352,6 +356,19 @@ static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) { return !intpel_mv; } #endif // CONFIG_EXT_INTERP + +#if CONFIG_OBMC +void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, + MACROBLOCKD *xd, int mi_row, int mi_col, + int use_tmp_dst_buf, + uint8_t *final_buf[MAX_MB_PLANE], + int final_stride[MAX_MB_PLANE], + uint8_t *tmp_buf1[MAX_MB_PLANE], + int tmp_stride1[MAX_MB_PLANE], + uint8_t *tmp_buf2[MAX_MB_PLANE], + int tmp_stride2[MAX_MB_PLANE]); +#endif // CONFIG_OBMC + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 8ed9d2cd0..a003d7ac4 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -657,7 +657,7 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, buf_stride, subpel_x, subpel_y; uint8_t *ref_frame, *buf_ptr; #if CONFIG_EXT_INTERP - const int i_filter = IsInterpolatingFilter(xd->mi[0]->mbmi.interp_filter); + const int i_filter = IsInterpolatingFilter(interp_filter); #endif // CONFIG_EXT_INTERP // Get reference frame pointer, width and height. @@ -699,6 +699,11 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, xs = sf->x_step_q4; ys = sf->y_step_q4; } else { +#if CONFIG_OBMC + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh, + pd->subsampling_x, + pd->subsampling_y); +#endif // CONFIG_OBMC // Co-ordinate of containing block to pixel precision. x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; @@ -707,8 +712,13 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, x0_16 = x0 << SUBPEL_BITS; y0_16 = y0 << SUBPEL_BITS; +#if CONFIG_OBMC + scaled_mv.row = mv_q4.row; + scaled_mv.col = mv_q4.col; +#else scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y)); scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x)); +#endif // CONFIG_OBMC xs = ys = 16; } subpel_x = scaled_mv.col & SUBPEL_MASK; @@ -871,6 +881,7 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi, } } } + #if CONFIG_SUPERTX static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi, MACROBLOCKD *xd, @@ -914,7 +925,211 @@ static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi, } } } -#endif +#endif // CONFIG_SUPERTX + +#if CONFIG_OBMC +static void dec_build_prediction_by_above_preds(VP10Decoder *const pbi, + MACROBLOCKD *xd, + int mi_row, int mi_col, + uint8_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]) { + VP10_COMMON *const cm = &pbi->common; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + int i, j, mi_step, ref; + + if (mi_row == 0) + return; + + for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) { + int mi_row_offset = -1; + int mi_col_offset = i; + int mi_x, mi_y, bw, bh; + const MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * cm->mi_stride]; + const MB_MODE_INFO *mbmi = &mi->mbmi; + const BLOCK_SIZE sb_type = mbmi->sb_type; + const int is_compound = has_second_ref(mbmi); + const INTERP_FILTER interp_filter = mbmi->interp_filter; + + mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[sb_type]); + + if (!is_inter_block(mbmi)) + continue; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + struct macroblockd_plane *const pd = &xd->plane[j]; + setup_pred_plane(&pd->dst, + tmp_buf[j], tmp_stride[j], + 0, i, NULL, + pd->subsampling_x, pd->subsampling_y); + } + for (ref = 0; ref < 1 + is_compound; ++ref) { + MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; + RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME]; + + xd->block_refs[ref] = ref_buf; + if ((!vp10_is_valid_scale(&ref_buf->sf))) + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i, + &ref_buf->sf); + } + + xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8); + mi_x = (mi_col + i) << MI_SIZE_LOG2; + mi_y = mi_row << MI_SIZE_LOG2; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + struct macroblockd_plane *pd = &xd->plane[j]; + struct buf_2d *const dst_buf = &pd->dst; + bw = (mi_step * 8) >> pd->subsampling_x; + bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y, + 4); + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + const int idx = xd->block_refs[ref]->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; + const int is_scaled = vp10_is_scaled(sf); + + if (sb_type < BLOCK_8X8) { + const PARTITION_TYPE bp = BLOCK_8X8 - sb_type; + const int have_vsplit = bp != PARTITION_HORZ; + const int have_hsplit = bp != PARTITION_VERT; + const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x); + const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y); + const int pw = 8 >> (have_vsplit | pd->subsampling_x); + int x, y; + + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) { + const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x); + if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) + && y == 0 && !pd->subsampling_y) + continue; + + dec_build_inter_predictors(pbi, xd, j, bw, bh, + 4 * x, 0, pw, bh, mi_x, mi_y, + interp_filter, sf, pre_buf, dst_buf, + &mv, ref_frame_buf, is_scaled, ref); + } + } else { + const MV mv = mi->mbmi.mv[ref].as_mv; + dec_build_inter_predictors(pbi, xd, j, bw, bh, + 0, 0, bw, bh, mi_x, mi_y, interp_filter, + sf, pre_buf, dst_buf, &mv, ref_frame_buf, + is_scaled, ref); + } + } + } + } + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); +} + +static void dec_build_prediction_by_left_preds(VP10Decoder *const pbi, + MACROBLOCKD *xd, + int mi_row, int mi_col, + uint8_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]) { + VP10_COMMON *const cm = &pbi->common; + const TileInfo *const tile = &xd->tile; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + int i, j, mi_step, ref; + + if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) || + (mi_col - 1) >= tile->mi_col_end) + return; + + for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) { + int mi_row_offset = i; + int mi_col_offset = -1; + int mi_x, mi_y, bw, bh; + const MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * cm->mi_stride]; + const MB_MODE_INFO *mbmi = &mi->mbmi; + const BLOCK_SIZE sb_type = mbmi->sb_type; + const int is_compound = has_second_ref(mbmi); + const INTERP_FILTER interp_filter = mbmi->interp_filter; + + mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[sb_type]); + + if (!is_inter_block(mbmi)) + continue; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + struct macroblockd_plane *const pd = &xd->plane[j]; + setup_pred_plane(&pd->dst, + tmp_buf[j], tmp_stride[j], + i, 0, NULL, + pd->subsampling_x, pd->subsampling_y); + } + + for (ref = 0; ref < 1 + is_compound; ++ref) { + MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; + RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME]; + + xd->block_refs[ref] = ref_buf; + if ((!vp10_is_valid_scale(&ref_buf->sf))) + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col, + &ref_buf->sf); + } + + xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8); + mi_x = mi_col << MI_SIZE_LOG2; + mi_y = (mi_row + i) << MI_SIZE_LOG2; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + struct macroblockd_plane *pd = &xd->plane[j]; + struct buf_2d *const dst_buf = &pd->dst; + bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x, + 4); + bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + const int idx = xd->block_refs[ref]->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; + const int is_scaled = vp10_is_scaled(sf); + + if (sb_type < BLOCK_8X8) { + const PARTITION_TYPE bp = BLOCK_8X8 - sb_type; + const int have_vsplit = bp != PARTITION_HORZ; + const int have_hsplit = bp != PARTITION_VERT; + const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x); + const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y); + const int ph = 8 >> (have_hsplit | pd->subsampling_y); + int x, y; + + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) { + const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x); + if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) + && x == 0 && !pd->subsampling_x) + continue; + + dec_build_inter_predictors(pbi, xd, j, bw, bh, + 0, 4 * y, bw, ph, mi_x, mi_y, + interp_filter, sf, pre_buf, dst_buf, + &mv, ref_frame_buf, is_scaled, ref); + } + } else { + const MV mv = mi->mbmi.mv[ref].as_mv; + dec_build_inter_predictors(pbi, xd, j, bw, bh, + 0, 0, bw, bh, mi_x, mi_y, interp_filter, + sf, pre_buf, dst_buf, &mv, ref_frame_buf, + is_scaled, ref); + } + } + } + } + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); +} +#endif // CONFIG_OBMC + static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, int n4_hl) { // get minimum log2 num4x4s dimension @@ -1638,6 +1853,52 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, } else { // Prediction dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); +#if CONFIG_OBMC + if (mbmi->obmc) { +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]); +#else + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); +#endif // CONFIG_VP9_HIGHBITDEPTH + uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; + int dst_stride1[MAX_MB_PLANE] = {64, 64, 64}; + int dst_stride2[MAX_MB_PLANE] = {64, 64, 64}; + + assert(mbmi->sb_type >= BLOCK_8X8); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + 4096; + dst_buf1[2] = tmp_buf1 + 8192; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + 4096; + dst_buf2[2] = tmp_buf2 + 8192; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dec_build_prediction_by_above_preds(pbi, xd, mi_row, mi_col, + dst_buf1, dst_stride1); + dec_build_prediction_by_left_preds(pbi, xd, mi_row, mi_col, + dst_buf2, dst_stride2); + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0, NULL, NULL, + dst_buf1, dst_stride1, + dst_buf2, dst_stride2); + } +#endif // CONFIG_OBMC // Reconstruction if (!mbmi->skip) { @@ -3313,6 +3574,11 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, read_inter_compound_mode_probs(fc, &r); #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC + for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i) + vp10_diff_update_prob(&r, &fc->obmc_prob[i]); +#endif // CONFIG_OBMC + if (cm->interp_filter == SWITCHABLE) read_switchable_interp_probs(fc, &r); @@ -3367,6 +3633,10 @@ static void debug_check_frame_counts(const VP10_COMMON *const cm) { zero_counts.inter_compound_mode, sizeof(cm->counts.inter_compound_mode))); #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC + assert(!memcmp(cm->counts.obmc, zero_counts.obmc, + sizeof(cm->counts.obmc))); +#endif // CONFIG_OBMC assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter, sizeof(cm->counts.intra_inter))); assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter, diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c index 156a26987..f9ed5a52f 100644 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c @@ -778,6 +778,24 @@ static void read_ref_frames(VP10_COMMON *const cm, MACROBLOCKD *const xd, } +#if CONFIG_OBMC +static int read_is_obmc_block(VP10_COMMON *const cm, MACROBLOCKD *const xd, + vpx_reader *r) { + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + FRAME_COUNTS *counts = xd->counts; + int is_obmc; + + if (is_obmc_allowed(&xd->mi[0]->mbmi)) { + is_obmc = vpx_read(r, cm->fc->obmc_prob[bsize]); + if (counts) + ++counts->obmc[bsize][is_obmc]; + return is_obmc; + } else { + return 0; + } +} +#endif // CONFIG_OBMC + static INLINE INTERP_FILTER read_switchable_interp_filter( VP10_COMMON *const cm, MACROBLOCKD *const xd, vpx_reader *r) { @@ -1016,7 +1034,12 @@ static void fpm_sync(void *const data, int mi_row) { static void read_inter_block_mode_info(VP10Decoder *const pbi, MACROBLOCKD *const xd, MODE_INFO *const mi, +#if CONFIG_OBMC && CONFIG_SUPERTX + int mi_row, int mi_col, vpx_reader *r, + int supertx_enabled) { +#else int mi_row, int mi_col, vpx_reader *r) { +#endif // CONFIG_OBMC && CONFIG_SUPERTX VP10_COMMON *const cm = &pbi->common; MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; @@ -1062,6 +1085,13 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi, mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx); } +#if CONFIG_OBMC +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif // CONFIG_SUPERTX + mbmi->obmc = read_is_obmc_block(cm, xd, r); +#endif // CONFIG_OBMC + #if CONFIG_REF_MV #if CONFIG_EXT_INTER if (is_compound) @@ -1365,14 +1395,19 @@ static void read_inter_frame_mode_info(VP10Decoder *const pbi, xd->mi[0]->mbmi.tx_size = xd->supertx_size; for (idy = 0; idy < height; ++idy) for (idx = 0; idx < width; ++idx) - xd->mi[0]->mbmi.inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = xd->supertx_size; + xd->mi[0]->mbmi.inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = + xd->supertx_size; } #endif // CONFIG_VAR_TX #endif // CONFIG_SUPERTX if (inter_block) read_inter_block_mode_info(pbi, xd, +#if CONFIG_OBMC && CONFIG_SUPERTX + mi, mi_row, mi_col, r, supertx_enabled); +#else mi, mi_row, mi_col, r); +#endif // CONFIG_OBMC && CONFIG_SUPERTX else read_intra_block_mode_info(cm, xd, mi, r); diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index ae8ff49e4..b786b5ca3 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -995,6 +995,13 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, } else { int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; write_ref_frames(cm, xd, w); +#if CONFIG_OBMC +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif // CONFIG_SUPERTX + if (is_obmc_allowed(mbmi)) + vpx_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]); +#endif // CONFIG_OBMC #if CONFIG_REF_MV #if CONFIG_EXT_INTER @@ -2395,6 +2402,12 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) { update_inter_compound_mode_probs(cm, &header_bc); #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC + for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i) + vp10_cond_prob_diff_update(&header_bc, &fc->obmc_prob[i], + counts->obmc[i]); +#endif // CONFIG_OBMC + if (cm->interp_filter == SWITCHABLE) update_switchable_interp_probs(cm, &header_bc, counts); diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index c3ef67453..075f852c0 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -1801,6 +1801,13 @@ static void update_stats(VP10_COMMON *cm, ThreadData *td [ref0 != GOLDEN_FRAME]++; #endif // CONFIG_EXT_REFS } +#if CONFIG_OBMC +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif // CONFIG_SUPERTX + if (is_obmc_allowed(mbmi)) + counts->obmc[mbmi->sb_type][mbmi->obmc]++; +#endif // CONFIG_OBMC } } if (inter_block && @@ -4421,6 +4428,55 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, vp10_build_inter_predictors_sbuv(xd, mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8)); +#if CONFIG_OBMC + if (mbmi->obmc) { +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]); +#else + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); +#endif // CONFIG_VP9_HIGHBITDEPTH + uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; + int dst_stride1[MAX_MB_PLANE] = {64, 64, 64}; + int dst_stride2[MAX_MB_PLANE] = {64, 64, 64}; + + assert(mbmi->sb_type >= BLOCK_8X8); + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + 4096; + dst_buf1[2] = tmp_buf1 + 8192; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + 4096; + dst_buf2[2] = tmp_buf2 + 8192; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1, + dst_stride1); + vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2, + dst_stride2); + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0, NULL, NULL, + dst_buf1, dst_stride1, + dst_buf2, dst_stride2); + } + +#endif // CONFIG_OBMC + vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); #if CONFIG_VAR_TX vp10_tokenize_sb_inter(cpi, td, t, !output_enabled, diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index 8faf4eded..cc2076564 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -479,6 +479,9 @@ typedef struct VP10_COMP { unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS] [INTER_COMPOUND_MODES]; #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC + int obmc_cost[BLOCK_SIZES][2]; +#endif // CONFIG_OBMC int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES]; int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c index 566300c68..bf73064ba 100644 --- a/vp10/encoder/rd.c +++ b/vp10/encoder/rd.c @@ -405,6 +405,12 @@ void vp10_initialize_rd_consts(VP10_COMP *cpi) { cm->fc->inter_compound_mode_probs[i], vp10_inter_compound_mode_tree); #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC + for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) { + cpi->obmc_cost[i][0] = vp10_cost_bit(cm->fc->obmc_prob[i], 0); + cpi->obmc_cost[i][1] = vp10_cost_bit(cm->fc->obmc_prob[i], 1); + } +#endif // CONFIG_OBMC } } diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 060fc3806..0a94733dc 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -4932,7 +4932,6 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { xd->mb_to_top_edge - LEFT_TOP_MARGIN, xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); } - static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize, @@ -4942,19 +4941,17 @@ static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi, (*single_filter)[MAX_REF_FRAMES] ) { INTERP_FILTER best_filter = SWITCHABLE; - const VP10_COMMON *cm = &cpi->common; const MACROBLOCKD *xd = &x->e_mbd; int bsl = mi_width_log2_lookup[bsize]; int pred_filter_search = cpi->sf.cb_pred_filter_search ? (((mi_row + mi_col) >> bsl) + - get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; + get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_comp_pred = has_second_ref(mbmi); const int this_mode = mbmi->mode; int refs[2] = { mbmi->ref_frame[0], - (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; - + (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; if (pred_filter_search) { INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; if (xd->up_available) @@ -5053,6 +5050,12 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row, int mi_col, +#if CONFIG_OBMC + uint8_t *dst_buf1[3], + int dst_stride1[3], + uint8_t *dst_buf2[3], + int dst_stride2[3], +#endif // CONFIG_OBMC #if CONFIG_EXT_INTER int_mv single_newmvs[2][MAX_REF_FRAMES], #else @@ -5088,6 +5091,24 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, #else DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH +#if CONFIG_OBMC +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * 64 * 64]); + uint8_t *tmp_buf1; + uint8_t *obmc_tmp_buf[3]; +#else + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); + uint8_t *obmc_tmp_buf[3] = {tmp_buf1, tmp_buf1 + 4096, tmp_buf1 + 8192}; +#endif // CONFIG_VP9_HIGHBITDEPTH + int obmc_tmp_stride[3] = {64, 64, 64}; + int best_obmc_flag = 0; + uint8_t tmp_skip_txfm[MAX_MB_PLANE << 2] = {0}; + int64_t tmp_bsse[MAX_MB_PLANE << 2] = {0}; + int64_t rdobmc; + int skip_txfm_sb_obmc = 0; + int64_t skip_sse_sb_obmc = INT64_MAX; + int allow_obmc = is_obmc_allowed(mbmi); +#endif // CONFIG_OBMC int pred_exists = 0; int intpel_mv; int64_t rd, tmp_rd, best_rd = INT64_MAX; @@ -5104,6 +5125,9 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, int64_t distortion_y = 0, distortion_uv = 0; int16_t mode_ctx = mbmi_ext->mode_context[refs[0]]; +#if CONFIG_OBMC + tmp_rd = 0; +#endif // CONFIG_OBMC #if CONFIG_REF_MV #if CONFIG_EXT_INTER if (is_comp_pred) @@ -5117,9 +5141,20 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16); +#if CONFIG_OBMC + tmp_buf1 = CONVERT_TO_BYTEPTR(tmp_buf1_16); +#endif // CONFIG_OBMC } else { tmp_buf = (uint8_t *)tmp_buf16; - } +#if CONFIG_OBMC + tmp_buf1 = (uint8_t *)tmp_buf1_16; +#endif // CONFIG_OBMC + } +#if CONFIG_OBMC + obmc_tmp_buf[0] = tmp_buf1; + obmc_tmp_buf[1] = tmp_buf1 + 4096; + obmc_tmp_buf[2] = tmp_buf1 + 8192; +#endif // CONFIG_OBMC #endif // CONFIG_VP9_HIGHBITDEPTH if (is_comp_pred) { @@ -5302,9 +5337,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, if (this_mode == NEARMV && is_comp_pred) { uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame); if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) { - int ref_mv_idx = mbmi->ref_mv_idx + 1; - cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; - cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; + cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][1].this_mv; + cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][1].comp_mv; for (i = 0; i < 2; ++i) { lower_mv_precision(&cur_mv[i].as_mv, cm->allow_high_precision_mv); @@ -5383,6 +5417,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, int64_t rs_rd; int tmp_skip_sb = 0; int64_t tmp_skip_sse = INT64_MAX; +#if CONFIG_OBMC + int obmc_flag = 0; + int tmp_skip_sb_obmc = 0; + int64_t tmp_skip_sse_obmc = INT64_MAX; +#endif // CONFIG_OBMC mbmi->interp_filter = i; rs = vp10_get_switchable_rate(cpi, xd); @@ -5395,10 +5434,21 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; +#if CONFIG_OBMC + if (allow_obmc) { + obmc_flag = best_obmc_flag; + rd += RDCOST(x->rdmult, x->rddiv, + cpi->obmc_cost[bsize][obmc_flag], 0); + } +#endif // CONFIG_OBMC *mask_filter = VPXMAX(*mask_filter, rd); } else { int rate_sum = 0; int64_t dist_sum = 0; +#if CONFIG_OBMC + int rate_sum_obmc = 0; + int64_t dist_sum_obmc = 0; +#endif // CONFIG_OBMC if (i > 0 && cpi->sf.adaptive_interp_filter_search && (cpi->sf.interp_filter_search_mask & (1 << i))) { rate_sum = INT_MAX; @@ -5423,6 +5473,40 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, &tmp_skip_sb, &tmp_skip_sse); rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); +#if CONFIG_OBMC + if (allow_obmc) { + rd += RDCOST(x->rdmult, x->rddiv, cpi->obmc_cost[bsize][0], 0); + memcpy(tmp_skip_txfm, x->skip_txfm, sizeof(tmp_skip_txfm)); + memcpy(tmp_bsse, x->bsse, sizeof(tmp_bsse)); + + vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1, + obmc_tmp_buf, obmc_tmp_stride, + dst_buf1, dst_stride1, + dst_buf2, dst_stride2); + for (j = 0; j < MAX_MB_PLANE; ++j) { + xd->plane[j].dst.buf = obmc_tmp_buf[j]; + xd->plane[j].dst.stride = obmc_tmp_stride[j]; + } + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum_obmc, &dist_sum_obmc, + &tmp_skip_sb_obmc, &tmp_skip_sse_obmc); + rdobmc = RDCOST(x->rdmult, x->rddiv, + rate_sum_obmc + cpi->obmc_cost[bsize][1], + dist_sum_obmc); + + if ((double)rdobmc <= 0.99 * (double)rd) { + obmc_flag = 1; + rd = rdobmc; + rate_sum = rate_sum_obmc; + dist_sum = dist_sum_obmc; + tmp_skip_sb = tmp_skip_sb_obmc; + tmp_skip_sse = tmp_skip_sse_obmc; + } else { + obmc_flag = 0; + memcpy(x->skip_txfm, tmp_skip_txfm, sizeof(tmp_skip_txfm)); + memcpy(x->bsse, tmp_bsse, sizeof(tmp_bsse)); + } + } +#endif // CONFIG_OBMC filter_cache[i] = rd; filter_cache[SWITCHABLE_FILTERS] = VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); @@ -5447,6 +5531,10 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, if (newbest) { best_rd = rd; best_filter = mbmi->interp_filter; +#if CONFIG_OBMC + if (allow_obmc) + best_obmc_flag = obmc_flag; +#endif // CONFIG_OBMC if (cm->interp_filter == SWITCHABLE && i && !(intpel_mv && IsInterpolatingFilter(i))) best_needs_copy = !best_needs_copy; @@ -5471,8 +5559,18 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter; rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0; +#if CONFIG_OBMC + if (allow_obmc) + mbmi->obmc = best_obmc_flag; + else + mbmi->obmc = 0; +#endif // CONFIG_OBMC +#if CONFIG_OBMC + if (pred_exists && !mbmi->obmc) { +#else if (pred_exists) { +#endif // CONFIG_OBMC if (best_needs_copy) { // again temporarily set the buffers to local memory to prevent a memcpy for (i = 0; i < MAX_MB_PLANE; i++) { @@ -5481,16 +5579,77 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, } } rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0); +#if CONFIG_OBMC + if (allow_obmc) + rd += RDCOST(x->rdmult, x->rddiv, + cpi->obmc_cost[bsize][mbmi->obmc], 0); +#endif // CONFIG_OBMC } else { int tmp_rate; int64_t tmp_dist; +#if CONFIG_OBMC + int tmp_rate_obmc; + int64_t tmp_dist_obmc; +#endif // CONFIG_OBMC // Handles the special case when a filter that is not in the // switchable list (ex. bilinear) is indicated at the frame level, or // skip condition holds. vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); +#if CONFIG_OBMC + if (mbmi->obmc) { + vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1, + obmc_tmp_buf, obmc_tmp_stride, + dst_buf1, dst_stride1, + dst_buf2, dst_stride2); + for (i = 0; i < MAX_MB_PLANE; ++i) { + xd->plane[i].dst.buf = obmc_tmp_buf[i]; + xd->plane[i].dst.stride = obmc_tmp_stride[i]; + } + model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, + &skip_txfm_sb, &skip_sse_sb); + rd = RDCOST(x->rdmult, x->rddiv, + rs + tmp_rate + cpi->obmc_cost[bsize][1], + tmp_dist); + } else { +#endif // CONFIG_OBMC model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); +#if CONFIG_OBMC + if (allow_obmc) { + rd += RDCOST(x->rdmult, x->rddiv, cpi->obmc_cost[bsize][0], 0); + memcpy(tmp_skip_txfm, x->skip_txfm, sizeof(tmp_skip_txfm)); + memcpy(tmp_bsse, x->bsse, sizeof(tmp_bsse)); + + vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1, + obmc_tmp_buf, obmc_tmp_stride, + dst_buf1, dst_stride1, + dst_buf2, dst_stride2); + for (i = 0; i < MAX_MB_PLANE; ++i) { + xd->plane[i].dst.buf = obmc_tmp_buf[i]; + xd->plane[i].dst.stride = obmc_tmp_stride[i]; + } + model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate_obmc, &tmp_dist_obmc, + &skip_txfm_sb_obmc, &skip_sse_sb_obmc); + rdobmc = RDCOST(x->rdmult, x->rddiv, + rs + tmp_rate_obmc + cpi->obmc_cost[bsize][1], + tmp_dist_obmc); + if ((double)rdobmc <= 0.99 * (double)rd) { + mbmi->obmc = 1; + rd = rdobmc; + skip_txfm_sb = skip_txfm_sb_obmc; + skip_sse_sb = skip_sse_sb_obmc; + } else { + mbmi->obmc = 0; + memcpy(x->skip_txfm, tmp_skip_txfm, sizeof(tmp_skip_txfm)); + memcpy(x->bsse, tmp_bsse, sizeof(tmp_bsse)); + restore_dst_buf(xd, orig_dst, orig_dst_stride); + } + } else { + mbmi->obmc = 0; + } + } +#endif // CONFIG_OBMC memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); memcpy(bsse, x->bsse, sizeof(bsse)); } @@ -5570,6 +5729,10 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, if (cm->interp_filter == SWITCHABLE) *rate2 += rs; +#if CONFIG_OBMC + if (allow_obmc) + *rate2 += cpi->obmc_cost[bsize][mbmi->obmc]; +#endif // CONFIG_OBMC memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); memcpy(x->bsse, bsse, sizeof(bsse)); @@ -5916,6 +6079,39 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); +#if CONFIG_OBMC +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]); +#else + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); +#endif // CONFIG_VP9_HIGHBITDEPTH + uint8_t *dst_buf1[3], *dst_buf2[3]; + int dst_stride1[3] = {64, 64, 64}; + int dst_stride2[3] = {64, 64, 64}; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + 4096; + dst_buf1[2] = tmp_buf1 + 8192; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + 4096; + dst_buf2[2] = tmp_buf2 + 8192; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_OBMC vp10_zero(best_mbmode); @@ -5988,6 +6184,14 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, } #endif +#if CONFIG_OBMC + vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1, + dst_stride1); + vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2, + dst_stride2); + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); +#endif // CONFIG_OBMC + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { if (!(cpi->ref_frame_flags & flag_list[ref_frame])) { // Skip checking missing references in both single and compound reference @@ -6286,6 +6490,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0; +#if CONFIG_OBMC + mbmi->obmc = 0; +#endif // CONFIG_OBMC x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); @@ -6450,6 +6657,10 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, &rate_y, &rate_uv, &disable_skip, frame_mv, mi_row, mi_col, +#if CONFIG_OBMC + dst_buf1, dst_stride1, + dst_buf2, dst_stride2, +#endif // CONFIG_OBMC #if CONFIG_EXT_INTER single_newmvs, #else @@ -6522,6 +6733,10 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, &tmp_rate_y, &tmp_rate_uv, &dummy_disable_skip, frame_mv, mi_row, mi_col, +#if CONFIG_OBMC + dst_buf1, dst_stride1, + dst_buf2, dst_stride2, +#endif // CONFIG_OBMC #if CONFIG_EXT_INTER dummy_single_newmvs, #else @@ -6671,6 +6886,10 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, } *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd), mbmi->ref_frame[0] != INTRA_FRAME); +#if CONFIG_OBMC + if (is_inter_block(mbmi) && is_obmc_allowed(mbmi)) + *returnrate_nocoef -= cpi->obmc_cost[bsize][mbmi->obmc]; +#endif // CONFIG_OBMC #endif // CONFIG_SUPERTX rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; @@ -7178,6 +7397,9 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; #endif // CONFIG_EXT_INTRA +#if CONFIG_OBMC + mbmi->obmc = 0; +#endif // CONFIG_OBMC for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; @@ -7896,3 +8118,167 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff, 0); } + +#if CONFIG_OBMC +void vp10_build_prediction_by_above_preds(VP10_COMP *cpi, + MACROBLOCKD *xd, + int mi_row, int mi_col, + uint8_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]) { + VP10_COMMON *const cm = &cpi->common; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + int i, j, mi_step, ref; + + if (mi_row == 0) + return; + + for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) { + int mi_row_offset = -1; + int mi_col_offset = i; + int mi_x, mi_y, bw, bh; + MODE_INFO *above_mi = xd->mi[mi_col_offset + + mi_row_offset * xd->mi_stride]; + MB_MODE_INFO *above_mbmi = &above_mi->mbmi; + + mi_step = VPXMIN(xd->n8_w, + num_8x8_blocks_wide_lookup[above_mbmi->sb_type]); + + if (!is_inter_block(above_mbmi)) + continue; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + struct macroblockd_plane *const pd = &xd->plane[j]; + setup_pred_plane(&pd->dst, + tmp_buf[j], tmp_stride[j], + 0, i, NULL, + pd->subsampling_x, pd->subsampling_y); + } + set_ref_ptrs(cm, xd, above_mbmi->ref_frame[0], above_mbmi->ref_frame[1]); + for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + above_mbmi->ref_frame[ref]); + assert(cfg != NULL); + vp10_setup_pre_planes(xd, ref, cfg, mi_row, mi_col + i, + &xd->block_refs[ref]->sf); + } + + xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8); + mi_x = (mi_col + i) << MI_SIZE_LOG2; + mi_y = mi_row << MI_SIZE_LOG2; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + const struct macroblockd_plane *pd = &xd->plane[j]; + bw = (mi_step * 8) >> pd->subsampling_x; + bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y, + 4); + + if (above_mbmi->sb_type < BLOCK_8X8) { + const PARTITION_TYPE bp = BLOCK_8X8 - above_mbmi->sb_type; + const int have_vsplit = bp != PARTITION_HORZ; + const int have_hsplit = bp != PARTITION_VERT; + const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x); + const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y); + const int pw = 8 >> (have_vsplit | pd->subsampling_x); + int x, y; + + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) { + if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) + && y == 0 && !pd->subsampling_y) + continue; + + build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, + y * 2 + x, bw, bh, + 4 * x, 0, pw, bh, mi_x, mi_y); + } + } else { + build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, + bw, bh, 0, 0, bw, bh, mi_x, mi_y); + } + } + } + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); +} + +void vp10_build_prediction_by_left_preds(VP10_COMP *cpi, + MACROBLOCKD *xd, + int mi_row, int mi_col, + uint8_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]) { + VP10_COMMON *const cm = &cpi->common; + const TileInfo *const tile = &xd->tile; + BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; + int i, j, mi_step, ref; + + if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) || + (mi_col - 1) >= tile->mi_col_end) + return; + + for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) { + int mi_row_offset = i; + int mi_col_offset = -1; + int mi_x, mi_y, bw, bh; + MODE_INFO *left_mi = xd->mi[mi_col_offset + + mi_row_offset * xd->mi_stride]; + MB_MODE_INFO *left_mbmi = &left_mi->mbmi; + + mi_step = VPXMIN(xd->n8_h, + num_8x8_blocks_high_lookup[left_mbmi->sb_type]); + + if (!is_inter_block(left_mbmi)) + continue; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + struct macroblockd_plane *const pd = &xd->plane[j]; + setup_pred_plane(&pd->dst, + tmp_buf[j], tmp_stride[j], + i, 0, NULL, + pd->subsampling_x, pd->subsampling_y); + } + set_ref_ptrs(cm, xd, left_mbmi->ref_frame[0], left_mbmi->ref_frame[1]); + for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + left_mbmi->ref_frame[ref]); + assert(cfg != NULL); + vp10_setup_pre_planes(xd, ref, cfg, mi_row + i, mi_col, + &xd->block_refs[ref]->sf); + } + + xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8); + mi_x = mi_col << MI_SIZE_LOG2; + mi_y = (mi_row + i) << MI_SIZE_LOG2; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + const struct macroblockd_plane *pd = &xd->plane[j]; + bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x, + 4); + bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y; + + if (left_mbmi->sb_type < BLOCK_8X8) { + const PARTITION_TYPE bp = BLOCK_8X8 - left_mbmi->sb_type; + const int have_vsplit = bp != PARTITION_HORZ; + const int have_hsplit = bp != PARTITION_VERT; + const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x); + const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y); + const int ph = 8 >> (have_hsplit | pd->subsampling_y); + int x, y; + + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) { + if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) + && x == 0 && !pd->subsampling_x) + continue; + + build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, + y * 2 + x, bw, bh, + 0, 4 * y, bw, ph, mi_x, mi_y); + } + } else { + build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, + bw, bh, 0, 0, bw, bh, mi_x, mi_y); + } + } + } + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); +} +#endif // CONFIG_OBMC diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h index 7c7e9ebb3..888ca4b61 100644 --- a/vp10/encoder/rdopt.h +++ b/vp10/encoder/rdopt.h @@ -89,6 +89,18 @@ void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x, int use_fast_coef_casting); #endif // CONFIG_SUPERTX +#if CONFIG_OBMC +void vp10_build_prediction_by_above_preds(VP10_COMP *cpi, + MACROBLOCKD *xd, + int mi_row, int mi_col, + uint8_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]); +void vp10_build_prediction_by_left_preds(VP10_COMP *cpi, + MACROBLOCKD *xd, + int mi_row, int mi_col, + uint8_t *tmp_buf[MAX_MB_PLANE], + int tmp_stride[MAX_MB_PLANE]); +#endif // CONFIG_OBMC #ifdef __cplusplus } // extern "C" #endif -- 2.40.0