From: Geza Lore Date: Mon, 4 Jul 2016 10:47:19 +0000 (+0100) Subject: Refactoring in preparation for OBMC optimizations. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=007aa7dd6519d708ffe86089b2b44960566fbe4d;p=libvpx Refactoring in preparation for OBMC optimizations. - Use int32_t instead of int in vpx_obmc{variance,sad} functions - Remove weigthed_src and obmc mask strides and assume contiguous buffers. These inputs can always be packed as contiguous arrays. Change-Id: I74c09b3fb3337f13d39e13a9cb61e140536f345d --- diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 4e486a9ef..d2757bf30 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -1228,19 +1228,16 @@ MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4) #define MAKE_OBFP_SAD_WRAPPER(fnname) \ static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \ - const int *wsrc, int wsrc_stride, \ - const int *msk, int msk_stride) { \ - return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride); \ + const int32_t *wsrc, const int32_t *msk) { \ + return fnname(ref, ref_stride, wsrc, msk); \ } \ static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \ - const int *wsrc, int wsrc_stride, \ - const int *msk, int msk_stride) { \ - return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 2; \ + const int32_t *wsrc, const int32_t *msk) { \ + return fnname(ref, ref_stride, wsrc, msk) >> 2; \ } \ static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \ - const int *wsrc, int wsrc_stride, \ - const int *msk, int msk_stride) { \ - return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 4; \ + const int32_t *wsrc, const int32_t *msk) { \ + return fnname(ref, ref_stride, wsrc, msk) >> 4; \ } #if CONFIG_EXT_PARTITION diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c index 510e5c734..553e08ecb 100644 --- a/vp10/encoder/mcomp.c +++ b/vp10/encoder/mcomp.c @@ -3109,8 +3109,7 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_OBMC /* returns subpixel variance error function */ #define DIST(r, c) \ - vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ - src_stride, mask, mask_stride, &sse) + vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse) /* checks if (r, c) has better score than previous best */ #define MVC(r, c) \ @@ -3140,8 +3139,8 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, #define CHECK_BETTER1(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ thismse = upsampled_obmc_pref_error(xd, \ - mask, mask_stride, \ - vfp, z, src_stride, \ + mask, \ + vfp, z, \ upre(y, y_stride, r, c), \ y_stride, \ w, h, &sse); \ @@ -3156,14 +3155,12 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, v = INT_MAX; \ } -static unsigned int setup_obmc_center_error(const int *mask, - int mask_stride, +static unsigned int setup_obmc_center_error(const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit, const vp10_variance_fn_ptr_t *vfp, - const int *const wsrc, - const int wsrc_stride, + const int32_t *const wsrc, const uint8_t *const y, int y_stride, int offset, @@ -3171,18 +3168,16 @@ static unsigned int setup_obmc_center_error(const int *mask, unsigned int *sse1, int *distortion) { unsigned int besterr; - besterr = vfp->ovf(y + offset, y_stride, wsrc, wsrc_stride, - mask, mask_stride, sse1); + besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); return besterr; } static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, - const int *mask, int mask_stride, + const int32_t *mask, const vp10_variance_fn_ptr_t *vfp, - const int *const wsrc, - const int wsrc_stride, + const int32_t *const wsrc, const uint8_t *const y, int y_stride, int w, int h, unsigned int *sse) { unsigned int besterr; @@ -3191,8 +3186,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride); - besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, wsrc_stride, - mask, mask_stride, sse); + besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse); } else { DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); #else @@ -3201,7 +3195,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, #endif // CONFIG_VP9_HIGHBITDEPTH vpx_upsampled_pred(pred, w, h, y, y_stride); - besterr = vfp->ovf(pred, w, wsrc, wsrc_stride, mask, mask_stride, sse); + besterr = vfp->ovf(pred, w, wsrc, mask, sse); #if CONFIG_VP9_HIGHBITDEPTH } #endif @@ -3210,15 +3204,14 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, static unsigned int upsampled_setup_obmc_center_error( const MACROBLOCKD *xd, - const int *mask, int mask_stride, + const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit, const vp10_variance_fn_ptr_t *vfp, - const int *const wsrc, const int wsrc_stride, + const int32_t *const wsrc, const uint8_t *const y, int y_stride, int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) { - unsigned int besterr = upsampled_obmc_pref_error(xd, mask, mask_stride, vfp, - wsrc, wsrc_stride, + unsigned int besterr = upsampled_obmc_pref_error(xd, mask, vfp, wsrc, y + offset, y_stride, w, h, sse1); *distortion = besterr; @@ -3227,8 +3220,8 @@ static unsigned int upsampled_setup_obmc_center_error( } int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x, - const int *wsrc, int wsrc_stride, - const int *mask, int mask_stride, + const int32_t *wsrc, + const int32_t *mask, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, @@ -3240,7 +3233,6 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x, int use_upsampled_ref) { const int *const z = wsrc; const int *const src_address = z; - const int src_stride = wsrc_stride; MACROBLOCKD *xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; @@ -3292,14 +3284,14 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x, // use_upsampled_ref can be 0 or 1 if (use_upsampled_ref) besterr = upsampled_setup_obmc_center_error( - xd, mask, mask_stride, bestmv, ref_mv, error_per_bit, - vfp, z, src_stride, y, y_stride, + xd, mask, bestmv, ref_mv, error_per_bit, + vfp, z, y, y_stride, w, h, (offset << 3), mvjcost, mvcost, sse1, distortion); else besterr = setup_obmc_center_error( - mask, mask_stride, bestmv, ref_mv, error_per_bit, - vfp, z, src_stride, y, y_stride, + mask, bestmv, ref_mv, error_per_bit, + vfp, z, y, y_stride, offset, mvjcost, mvcost, sse1, distortion); for (iter = 0; iter < round; ++iter) { @@ -3313,16 +3305,15 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x, if (use_upsampled_ref) { const uint8_t *const pre_address = y + tr * y_stride + tc; - thismse = upsampled_obmc_pref_error(xd, mask, mask_stride, - vfp, src_address, src_stride, + thismse = upsampled_obmc_pref_error(xd, mask, + vfp, src_address, pre_address, y_stride, w, h, &sse); } else { const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr), - src_address, src_stride, - mask, mask_stride, &sse); + src_address, mask, &sse); } cost_array[idx] = thismse + @@ -3350,15 +3341,14 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x, if (use_upsampled_ref) { const uint8_t *const pre_address = y + tr * y_stride + tc; - thismse = upsampled_obmc_pref_error(xd, mask, mask_stride, - vfp, src_address, src_stride, + thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address, pre_address, y_stride, w, h, &sse); } else { const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr), - src_address, src_stride, mask, mask_stride, &sse); + src_address, mask, &sse); } cost_array[4] = thismse + @@ -3422,8 +3412,8 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x, #undef CHECK_BETTER static int get_obmc_mvpred_var(const MACROBLOCK *x, - const int *wsrc, int wsrc_stride, - const int *mask, int mask_stride, + const int32_t *wsrc, + const int32_t *mask, const MV *best_mv, const MV *center_mv, const vp10_variance_fn_ptr_t *vfp, int use_mvcost, int is_second) { @@ -3433,14 +3423,14 @@ static int get_obmc_mvpred_var(const MACROBLOCK *x, unsigned int unused; return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, - wsrc, wsrc_stride, mask, mask_stride, &unused) + + wsrc, mask, &unused) + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); } int obmc_refining_search_sad(const MACROBLOCK *x, - const int *wsrc, int wsrc_stride, - const int *mask, int mask_stride, + const int32_t *wsrc, + const int32_t *mask, MV *ref_mv, int error_per_bit, int search_range, const vp10_variance_fn_ptr_t *fn_ptr, @@ -3450,8 +3440,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x, const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv), - in_what->stride, - wsrc, wsrc_stride, mask, mask_stride) + + in_what->stride, wsrc, mask) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -3463,8 +3452,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x, ref_mv->col + neighbors[j].col}; if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv), - in_what->stride, wsrc, wsrc_stride, - mask, mask_stride); + in_what->stride, wsrc, mask); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { @@ -3487,8 +3475,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x, int obmc_diamond_search_sad(const MACROBLOCK *x, const search_site_config *cfg, - const int *wsrc, int wsrc_stride, - const int *mask, int mask_stride, + const int32_t *wsrc, const int32_t *mask, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, @@ -3516,8 +3503,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x, *best_mv = *ref_mv; // Check the starting position - best_sad = fn_ptr->osdf(best_address, in_what->stride, - wsrc, wsrc_stride, mask, mask_stride) + + best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -3528,7 +3514,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x, best_mv->col + ss[i].mv.col}; if (is_mv_in(x, &mv)) { int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride, - wsrc, wsrc_stride, mask, mask_stride); + wsrc, mask); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -3552,8 +3538,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x, best_mv->col + ss[best_site].mv.col}; if (is_mv_in(x, &this_mv)) { int sad = fn_ptr->osdf(best_address + ss[best_site].offset, - in_what->stride, wsrc, wsrc_stride, - mask, mask_stride); + in_what->stride, wsrc, mask); if (sad < best_sad) { sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -3576,8 +3561,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x, } int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, - const int *wsrc, int wsrc_stride, - const int *mask, int mask_stride, + const int32_t *wsrc, const int32_t *mask, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, const vp10_variance_fn_ptr_t *fn_ptr, @@ -3586,13 +3570,12 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, MV temp_mv; int thissme, n, num00 = 0; int bestsme = obmc_diamond_search_sad(x, &cpi->ss_cfg, - wsrc, wsrc_stride, - mask, mask_stride, + wsrc, mask, mvp_full, &temp_mv, step_param, sadpb, &n, fn_ptr, ref_mv, is_second); if (bestsme < INT_MAX) - bestsme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride, + bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1, is_second); *dst_mv = temp_mv; @@ -3608,13 +3591,12 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, num00--; } else { thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg, - wsrc, wsrc_stride, - mask, mask_stride, + wsrc, mask, mvp_full, &temp_mv, step_param + n, sadpb, &num00, fn_ptr, ref_mv, is_second); if (thissme < INT_MAX) - thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride, + thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1, is_second); // check to see if refining search is needed. @@ -3632,11 +3614,11 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, if (do_refine) { const int search_range = 8; MV best_mv = *dst_mv; - thissme = obmc_refining_search_sad(x, wsrc, wsrc_stride, mask, mask_stride, + thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb, search_range, fn_ptr, ref_mv, is_second); if (thissme < INT_MAX) - thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride, + thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1, is_second); if (thissme < bestsme) { bestsme = thissme; diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h index 1b4e6130d..704e26c77 100644 --- a/vp10/encoder/mcomp.h +++ b/vp10/encoder/mcomp.h @@ -198,16 +198,16 @@ int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_OBMC int vp10_obmc_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x, - const int *wsrc, int wsrc_stride, - const int *mask, int mask_stride, + const int32_t *wsrc, + const int32_t *mask, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, const vp10_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv, int is_second); int vp10_find_best_obmc_sub_pixel_tree_up(struct VP10_COMP *cpi, MACROBLOCK *x, - const int *wsrc, int wsrc_stride, - const int *mask, int mask_stride, + const int32_t *wsrc, + const int32_t *mask, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 25f67abd6..7cc65e65e 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -6073,8 +6073,7 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, #if CONFIG_OBMC static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, - const int* wsrc, int wsrc_stride, - const int* mask, int mask_stride, + const int32_t* wsrc, const int32_t* mask, #if CONFIG_EXT_INTER int ref_idx, int mv_idx, @@ -6173,8 +6172,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x, mvp_full.col >>= 3; mvp_full.row >>= 3; - bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, wsrc_stride, - mask, mask_stride, + bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, mask, &mvp_full, step_param, sadpb, MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], @@ -6188,8 +6186,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; vp10_find_best_obmc_sub_pixel_tree_up(cpi, x, - wsrc, wsrc_stride, - mask, mask_stride, + wsrc, mask, mi_row, mi_col, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, @@ -6796,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_OBMC uint8_t *dst_buf1[3], int dst_stride1[3], uint8_t *dst_buf2[3], int dst_stride2[3], - int *wsrc, int wsrc_strides, - int *mask2d, int mask2d_strides, + const int32_t *const wsrc, + const int32_t *const mask2d, #endif // CONFIG_OBMC #if CONFIG_EXT_INTER int_mv single_newmvs[2][MAX_REF_FRAMES], @@ -7739,8 +7736,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, pred_mv.as_int = mbmi->mv[0].as_int; single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col, - wsrc, wsrc_strides, - mask2d, mask2d_strides, + wsrc, mask2d, #if CONFIG_EXT_INTER 0, mv_idx, #endif // CONFIG_EXT_INTER @@ -8494,13 +8490,11 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); #endif // CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, int, weighted_src_buf[MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, int, mask2d_buf[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, int32_t, weighted_src_buf[MAX_SB_SQUARE]); + DECLARE_ALIGNED(16, int32_t, mask2d_buf[MAX_SB_SQUARE]); uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE}; - int weighted_src_stride = MAX_SB_SIZE; - int mask2d_stride = MAX_SB_SIZE; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -8605,8 +8599,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, dst_buf1[0], dst_stride1[0], dst_buf2[0], dst_stride2[0], - mask2d_buf, mask2d_stride, - weighted_src_buf, weighted_src_stride); + mask2d_buf, weighted_src_buf); #endif // CONFIG_OBMC for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { @@ -9143,8 +9136,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, #if CONFIG_OBMC dst_buf1, dst_stride1, dst_buf2, dst_stride2, - weighted_src_buf, weighted_src_stride, - mask2d_buf, mask2d_stride, + weighted_src_buf, + mask2d_buf, #endif // CONFIG_OBMC #if CONFIG_EXT_INTER single_newmvs, @@ -9258,8 +9251,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, dst_buf1, dst_stride1, dst_buf2, dst_stride2, weighted_src_buf, - weighted_src_stride, - mask2d_buf, mask2d_stride, + mask2d_buf, #endif // CONFIG_OBMC #if CONFIG_EXT_INTER dummy_single_newmvs, @@ -10970,14 +10962,16 @@ void calc_target_weighted_pred(VP10_COMMON *cm, int mi_row, int mi_col, uint8_t *above_buf, int above_stride, uint8_t *left_buf, int left_stride, - int *mask_buf, int mask_stride, - int *weighted_src_buf, int weighted_src_stride) { + int32_t *mask_buf, + int32_t *weighted_src_buf) { BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int row, col, i, mi_step; int bw = 8 * xd->n8_w; int bh = 8 * xd->n8_h; - int *dst = weighted_src_buf; - int *mask2d = mask_buf; + const int mask_stride = bw; + const int weighted_src_stride = bw; + int32_t *dst = weighted_src_buf; + int32_t *mask2d = mask_buf; uint8_t *src; #if CONFIG_VP9_HIGHBITDEPTH int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; @@ -11009,11 +11003,11 @@ void calc_target_weighted_pred(VP10_COMMON *cm, int bw = (mi_step * MI_SIZE) >> pd->subsampling_x; int bh = overlap >> pd->subsampling_y; int dst_stride = weighted_src_stride; - int *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x); + int32_t *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x); int tmp_stride = above_stride; uint8_t *tmp = above_buf + (i * MI_SIZE >> pd->subsampling_x); int mask2d_stride = mask_stride; - int *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x); + int32_t *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x); const uint8_t *mask1d[2]; setup_obmc_mask(bh, mask1d); @@ -11078,14 +11072,14 @@ void calc_target_weighted_pred(VP10_COMMON *cm, int bw = overlap >> pd->subsampling_x; int bh = (mi_step * MI_SIZE) >> pd->subsampling_y; int dst_stride = weighted_src_stride; - int *dst = weighted_src_buf + + int32_t *dst = weighted_src_buf + (i * MI_SIZE * dst_stride >> pd->subsampling_y); int tmp_stride = left_stride; uint8_t *tmp = left_buf + (i * MI_SIZE * tmp_stride >> pd->subsampling_y); int mask2d_stride = mask_stride; - int *mask2d = mask_buf + - (i * MI_SIZE * mask2d_stride >> pd->subsampling_y); + int32_t *mask2d = mask_buf + + (i * MI_SIZE * mask2d_stride >> pd->subsampling_y); const uint8_t *mask1d[2]; setup_obmc_mask(bw, mask1d); diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h index be6227b09..b660e2376 100644 --- a/vp10/encoder/rdopt.h +++ b/vp10/encoder/rdopt.h @@ -97,8 +97,7 @@ void calc_target_weighted_pred(VP10_COMMON *cm, int mi_row, int mi_col, uint8_t *above_buf, int above_stride, uint8_t *left_buf, int left_stride, - int *mask_buf, int mask_stride, - int *weighted_src_buf, int weighted_src_stride); + int32_t *mask_buf, int32_t *weighted_src_buf); #endif // CONFIG_OBMC #ifdef __cplusplus diff --git a/vpx_dsp/sad.c b/vpx_dsp/sad.c index b3ed41050..bb1daf823 100644 --- a/vpx_dsp/sad.c +++ b/vpx_dsp/sad.c @@ -456,21 +456,19 @@ HIGHBD_MASKSADMXN(4, 4) // b: target weighted prediction (has been *4096 to keep precision) // m: 2d weights (scaled by 4096) static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride, - const int *b, int b_stride, - const int *m, int m_stride, + const int32_t *b, + const int32_t *m, int width, int height) { int y, x; unsigned int sad = 0; for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - int abs_diff = abs(b[x] - a[x] * m[x]); - sad += (abs_diff + 2048) >> 12; - } + for (x = 0; x < width; x++) + sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12); a += a_stride; - b += b_stride; - m += m_stride; + b += width; + m += width; } return sad; @@ -478,9 +476,9 @@ static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride, #define OBMCSADMxN(m, n) \ unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \ - const int *wsrc, int wsrc_stride, \ - const int *msk, int msk_stride) { \ - return obmc_sad(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride, m, n); \ + const int32_t *wsrc, \ + const int32_t *msk) { \ + return obmc_sad(ref, ref_stride, wsrc, msk, m, n); \ } #if CONFIG_EXT_PARTITION @@ -504,22 +502,20 @@ OBMCSADMxN(4, 4) #if CONFIG_VP9_HIGHBITDEPTH static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride, - const int *b, int b_stride, - const int *m, int m_stride, + const int32_t *b, + const int32_t *m, int width, int height) { int y, x; unsigned int sad = 0; const uint16_t *a = CONVERT_TO_SHORTPTR(a8); for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - int abs_diff = abs(b[x] - a[x] * m[x]); - sad += (abs_diff + 2048) >> 12; - } + for (x = 0; x < width; x++) + sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12); a += a_stride; - b += b_stride; - m += m_stride; + b += width; + m += width; } return sad; @@ -528,12 +524,9 @@ static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride, #define HIGHBD_OBMCSADMXN(m, n) \ unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref, \ int ref_stride, \ - const int *wsrc, \ - int wsrc_stride, \ - const int *msk, \ - int msk_stride) { \ - return highbd_obmc_sad(ref, ref_stride, wsrc, wsrc_stride, \ - msk, msk_stride, m, n); \ + const int32_t *wsrc, \ + const int32_t *msk) { \ + return highbd_obmc_sad(ref, ref_stride, wsrc, msk, m, n); \ } #if CONFIG_EXT_PARTITION diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c index ab3d8bbc1..b9f0e32fd 100644 --- a/vpx_dsp/variance.c +++ b/vpx_dsp/variance.c @@ -1026,8 +1026,8 @@ HIGHBD_MASK_SUBPIX_VAR(128, 128) #if CONFIG_VP10 && CONFIG_OBMC void obmc_variance(const uint8_t *a, int a_stride, - const int *b, int b_stride, - const int *m, int m_stride, + const int32_t *b, + const int32_t *m, int w, int h, unsigned int *sse, int *sum) { int i, j; @@ -1036,26 +1036,24 @@ void obmc_variance(const uint8_t *a, int a_stride, for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { - int scaled_diff = b[j] - a[j] * m[j]; - int abs_diff = (abs(scaled_diff) + 2048) >> 12; - int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff; + int diff = ROUND_POWER_OF_TWO_SIGNED(b[j] - a[j] * m[j], 12); *sum += diff; *sse += diff * diff; } a += a_stride; - b += b_stride; - m += m_stride; + b += w; + m += w; } } #define OBMC_VAR(W, H) \ unsigned int vpx_obmc_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ - const int *b, int b_stride, \ - const int *m, int m_stride, \ + const int32_t *b, \ + const int32_t *m, \ unsigned int *sse) { \ int sum; \ - obmc_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \ + obmc_variance(a, a_stride, b, m, W, H, sse, &sum); \ return *sse - (((int64_t)sum * sum) / (W * H)); \ } @@ -1063,8 +1061,8 @@ unsigned int vpx_obmc_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ unsigned int vpx_obmc_sub_pixel_variance##W##x##H##_c( \ const uint8_t *pre, int pre_stride, \ int xoffset, int yoffset, \ - const int *wsrc, int wsrc_stride, \ - const int *msk, int msk_stride, \ + const int32_t *wsrc, \ + const int32_t *msk, \ unsigned int *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint8_t temp2[H * W]; \ @@ -1074,8 +1072,7 @@ unsigned int vpx_obmc_sub_pixel_variance##W##x##H##_c( \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters_2t[yoffset]); \ \ - return vpx_obmc_variance##W##x##H##_c(temp2, W, wsrc, wsrc_stride, \ - msk, msk_stride, sse); \ + return vpx_obmc_variance##W##x##H##_c(temp2, W, wsrc, msk, sse); \ } OBMC_VAR(4, 4) @@ -1130,8 +1127,7 @@ OBMC_SUBPIX_VAR(128, 128) #if CONFIG_VP9_HIGHBITDEPTH void highbd_obmc_variance64(const uint8_t *a8, int a_stride, - const int *b, int b_stride, - const int *m, int m_stride, + const int32_t *b, const int32_t *m, int w, int h, uint64_t *sse, int64_t *sum) { int i, j; uint16_t *a = CONVERT_TO_SHORTPTR(a8); @@ -1141,51 +1137,43 @@ void highbd_obmc_variance64(const uint8_t *a8, int a_stride, for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { - int scaled_diff = b[j] - a[j] * m[j]; - int abs_diff = (abs(scaled_diff) + 2048) >> 12; - int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff; + int diff = ROUND_POWER_OF_TWO_SIGNED(b[j] - a[j] * m[j], 12); *sum += diff; *sse += diff * diff; } a += a_stride; - b += b_stride; - m += m_stride; + b += w; + m += w; } } void highbd_obmc_variance(const uint8_t *a8, int a_stride, - const int *b, int b_stride, - const int *m, int m_stride, - int w, int h, unsigned int *sse, int *sum) { + const int32_t *b, const int32_t *m, + int w, int h, unsigned int *sse, int *sum) { int64_t sum64; uint64_t sse64; - highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride, - w, h, &sse64, &sum64); + highbd_obmc_variance64(a8, a_stride, b, m, w, h, &sse64, &sum64); *sum = (int)sum64; *sse = (unsigned int)sse64; } void highbd_10_obmc_variance(const uint8_t *a8, int a_stride, - const int *b, int b_stride, - const int *m, int m_stride, - int w, int h, unsigned int *sse, int *sum) { + const int32_t *b, const int32_t *m, + int w, int h, unsigned int *sse, int *sum) { int64_t sum64; uint64_t sse64; - highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride, - w, h, &sse64, &sum64); + highbd_obmc_variance64(a8, a_stride, b, m, w, h, &sse64, &sum64); *sum = (int)ROUND_POWER_OF_TWO(sum64, 2); *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4); } void highbd_12_obmc_variance(const uint8_t *a8, int a_stride, - const int *b, int b_stride, - const int *m, int m_stride, - int w, int h, unsigned int *sse, int *sum) { + const int32_t *b, const int32_t *m, + int w, int h, unsigned int *sse, int *sum) { int64_t sum64; uint64_t sse64; - highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride, - w, h, &sse64, &sum64); + highbd_obmc_variance64(a8, a_stride, b, m, w, h, &sse64, &sum64); *sum = (int)ROUND_POWER_OF_TWO(sum64, 4); *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8); } @@ -1193,40 +1181,31 @@ void highbd_12_obmc_variance(const uint8_t *a8, int a_stride, #define HIGHBD_OBMC_VAR(W, H) \ unsigned int vpx_highbd_obmc_variance##W##x##H##_c(const uint8_t *a, \ int a_stride, \ - const int *b, \ - int b_stride, \ - const int *m, \ - int m_stride, \ + const int32_t *b, \ + const int32_t *m, \ unsigned int *sse) { \ int sum; \ - highbd_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \ - W, H, sse, &sum); \ + highbd_obmc_variance(a, a_stride, b, m, W, H, sse, &sum); \ return *sse - (((int64_t)sum * sum) / (W * H)); \ } \ \ unsigned int vpx_highbd_10_obmc_variance##W##x##H##_c(const uint8_t *a, \ int a_stride, \ - const int *b, \ - int b_stride, \ - const int *m, \ - int m_stride, \ + const int32_t *b, \ + const int32_t *m, \ unsigned int *sse) { \ int sum; \ - highbd_10_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \ - W, H, sse, &sum); \ + highbd_10_obmc_variance(a, a_stride, b, m, W, H, sse, &sum); \ return *sse - (((int64_t)sum * sum) / (W * H)); \ } \ \ unsigned int vpx_highbd_12_obmc_variance##W##x##H##_c(const uint8_t *a, \ int a_stride, \ - const int *b, \ - int b_stride, \ - const int *m, \ - int m_stride, \ + const int32_t *b, \ + const int32_t *m, \ unsigned int *sse) { \ int sum; \ - highbd_12_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \ - W, H, sse, &sum); \ + highbd_12_obmc_variance(a, a_stride, b, m, W, H, sse, &sum); \ return *sse - (((int64_t)sum * sum) / (W * H)); \ } @@ -1234,8 +1213,8 @@ unsigned int vpx_highbd_12_obmc_variance##W##x##H##_c(const uint8_t *a, \ unsigned int vpx_highbd_obmc_sub_pixel_variance##W##x##H##_c( \ const uint8_t *pre, int pre_stride, \ int xoffset, int yoffset, \ - const int *wsrc, int wsrc_stride, \ - const int *msk, int msk_stride, \ + const int32_t *wsrc, \ + const int32_t *msk, \ unsigned int *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ @@ -1247,15 +1226,14 @@ unsigned int vpx_highbd_obmc_sub_pixel_variance##W##x##H##_c( \ bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ - W, wsrc, wsrc_stride, \ - msk, msk_stride, sse); \ + W, wsrc, msk, sse); \ } \ \ unsigned int vpx_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \ const uint8_t *pre, int pre_stride, \ int xoffset, int yoffset, \ - const int *wsrc, int wsrc_stride, \ - const int *msk, int msk_stride, \ + const int32_t *wsrc, \ + const int32_t *msk, \ unsigned int *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ @@ -1267,15 +1245,14 @@ unsigned int vpx_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \ bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ - W, wsrc, wsrc_stride, \ - msk, msk_stride, sse); \ + W, wsrc, msk, sse); \ } \ \ unsigned int vpx_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \ const uint8_t *pre, int pre_stride, \ int xoffset, int yoffset, \ - const int *wsrc, int wsrc_stride, \ - const int *msk, int msk_stride, \ + const int32_t *wsrc, \ + const int32_t *msk, \ unsigned int *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ @@ -1287,8 +1264,7 @@ unsigned int vpx_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \ bilinear_filters_2t[yoffset]); \ \ return vpx_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ - W, wsrc, wsrc_stride, \ - msk, msk_stride, sse); \ + W, wsrc, msk, sse); \ } HIGHBD_OBMC_VAR(4, 4) diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h index 88ab5e3a1..aaef8c03e 100644 --- a/vpx_dsp/variance.h +++ b/vpx_dsp/variance.h @@ -101,24 +101,18 @@ typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src, #if CONFIG_VP10 && CONFIG_OBMC typedef unsigned int(*vpx_obmc_sad_fn_t)(const uint8_t *pred, int pred_stride, - const int *wsrc, - int wsrc_stride, - const int *msk, - int msk_stride); + const int32_t *wsrc, + const int32_t *msk); typedef unsigned int (*vpx_obmc_variance_fn_t)(const uint8_t *pred, int pred_stride, - const int *wsrc, - int wsrc_stride, - const int *msk, - int msk_stride, + const int32_t *wsrc, + const int32_t *msk, unsigned int *sse); typedef unsigned int (*vpx_obmc_subpixvariance_fn_t)(const uint8_t *pred, int pred_stride, int xoffset, int yoffset, - const int *wsrc, - int wsrc_stride, - const int *msk, - int msk_stride, + const int32_t *wsrc, + const int32_t *msk, unsigned int *sse); #endif // CONFIG_VP10 && CONFIG_OBMC diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 2fb61f103..4944b2ece 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1103,14 +1103,14 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") { if (vpx_config("CONFIG_OBMC") eq "yes") { foreach (@block_sizes) { ($w, $h) = @$_; - add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride"; + add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask"; specialize "vpx_obmc_sad${w}x${h}"; } if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { foreach (@block_sizes) { ($w, $h) = @$_; - add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride"; + add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask"; specialize "vpx_highbd_obmc_sad${w}x${h}"; } } @@ -1400,8 +1400,8 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") { if (vpx_config("CONFIG_OBMC") eq "yes") { foreach (@block_sizes) { ($w, $h) = @$_; - add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse"; - add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse"; specialize "vpx_obmc_variance${w}x${h}"; specialize "vpx_obmc_sub_pixel_variance${w}x${h}"; } @@ -1410,8 +1410,8 @@ if (vpx_config("CONFIG_OBMC") eq "yes") { foreach $bd ("_", "_10_", "_12_") { foreach (@block_sizes) { ($w, $h) = @$_; - add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse"; - add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse"; + add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse"; specialize "vpx_highbd${bd}obmc_variance${w}x${h}"; specialize "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}"; }