From: Geza Lore <gezalore@gmail.com>
Date: Mon, 4 Jul 2016 10:47:19 +0000 (+0100)
Subject: Refactoring in preparation for OBMC optimizations.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=007aa7dd6519d708ffe86089b2b44960566fbe4d;p=libvpx

Refactoring in preparation for OBMC optimizations.

- Use int32_t instead of int in vpx_obmc{variance,sad} functions
- Remove weigthed_src and obmc mask strides and assume contiguous
  buffers. These inputs can always be packed as contiguous arrays.

Change-Id: I74c09b3fb3337f13d39e13a9cb61e140536f345d
---

diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 4e486a9ef..d2757bf30 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -1228,19 +1228,16 @@ MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4)
 
 #define MAKE_OBFP_SAD_WRAPPER(fnname)                                      \
 static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride,     \
-                                   const int *wsrc, int wsrc_stride,       \
-                                   const int *msk, int msk_stride) {       \
-  return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride);      \
+                                   const int32_t *wsrc, const int32_t *msk) { \
+  return fnname(ref, ref_stride, wsrc, msk);                               \
 }                                                                          \
 static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride,    \
-                                    const int *wsrc, int wsrc_stride,      \
-                                    const int *msk, int msk_stride) {      \
-  return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 2; \
+                                    const int32_t *wsrc, const int32_t *msk) { \
+  return fnname(ref, ref_stride, wsrc, msk) >> 2;                          \
 }                                                                          \
 static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride,    \
-                                    const int *wsrc, int wsrc_stride,      \
-                                    const int *msk, int msk_stride) {      \
-  return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 4; \
+                                    const int32_t *wsrc, const int32_t *msk) { \
+  return fnname(ref, ref_stride, wsrc, msk) >> 4;                          \
 }
 
 #if CONFIG_EXT_PARTITION
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 510e5c734..553e08ecb 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -3109,8 +3109,7 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
 #if CONFIG_OBMC
 /* returns subpixel variance error function */
 #define DIST(r, c)                                                     \
-  vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,         \
-            src_stride, mask, mask_stride, &sse)
+  vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
 
 /* checks if (r, c) has better score than previous best */
 #define MVC(r, c)                                                      \
@@ -3140,8 +3139,8 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
 #define CHECK_BETTER1(v, r, c) \
   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
     thismse = upsampled_obmc_pref_error(xd,                            \
-                                        mask, mask_stride,             \
-                                        vfp, z, src_stride,            \
+                                        mask,                          \
+                                        vfp, z,                        \
                                         upre(y, y_stride, r, c),       \
                                         y_stride,                      \
                                         w, h, &sse);                   \
@@ -3156,14 +3155,12 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
     v = INT_MAX;                                                       \
   }
 
-static unsigned int setup_obmc_center_error(const int *mask,
-                                            int mask_stride,
+static unsigned int setup_obmc_center_error(const int32_t *mask,
                                             const MV *bestmv,
                                             const MV *ref_mv,
                                             int error_per_bit,
                                             const vp10_variance_fn_ptr_t *vfp,
-                                            const int *const wsrc,
-                                            const int wsrc_stride,
+                                            const int32_t *const wsrc,
                                             const uint8_t *const y,
                                             int y_stride,
                                             int offset,
@@ -3171,18 +3168,16 @@ static unsigned int setup_obmc_center_error(const int *mask,
                                             unsigned int *sse1,
                                             int *distortion) {
   unsigned int besterr;
-  besterr = vfp->ovf(y + offset, y_stride, wsrc, wsrc_stride,
-                     mask, mask_stride, sse1);
+  besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
   *distortion = besterr;
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
   return besterr;
 }
 
 static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
-                                     const int *mask, int mask_stride,
+                                     const int32_t *mask,
                                      const vp10_variance_fn_ptr_t *vfp,
-                                     const int *const wsrc,
-                                     const int wsrc_stride,
+                                     const int32_t *const wsrc,
                                      const uint8_t *const y, int y_stride,
                                      int w, int h, unsigned int *sse) {
   unsigned int besterr;
@@ -3191,8 +3186,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
     DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
     vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
 
-    besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, wsrc_stride,
-                       mask, mask_stride, sse);
+    besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse);
   } else {
     DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
 #else
@@ -3201,7 +3195,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
     vpx_upsampled_pred(pred, w, h, y, y_stride);
 
-    besterr = vfp->ovf(pred, w, wsrc, wsrc_stride, mask, mask_stride, sse);
+    besterr = vfp->ovf(pred, w, wsrc, mask, sse);
 #if CONFIG_VP9_HIGHBITDEPTH
   }
 #endif
@@ -3210,15 +3204,14 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
 
 static unsigned int upsampled_setup_obmc_center_error(
                         const MACROBLOCKD *xd,
-                        const int *mask, int mask_stride,
+                        const int32_t *mask,
                         const MV *bestmv, const MV *ref_mv,
                         int error_per_bit, const vp10_variance_fn_ptr_t *vfp,
-                        const int *const wsrc, const int wsrc_stride,
+                        const int32_t *const wsrc,
                         const uint8_t *const y, int y_stride,
                         int w, int h, int offset, int *mvjcost, int *mvcost[2],
                         unsigned int *sse1, int *distortion) {
-  unsigned int besterr = upsampled_obmc_pref_error(xd, mask, mask_stride, vfp,
-                                                   wsrc, wsrc_stride,
+  unsigned int besterr = upsampled_obmc_pref_error(xd, mask, vfp, wsrc,
                                                    y + offset, y_stride,
                                                    w, h, sse1);
   *distortion = besterr;
@@ -3227,8 +3220,8 @@ static unsigned int upsampled_setup_obmc_center_error(
 }
 
 int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
-                                          const int *wsrc, int wsrc_stride,
-                                          const int *mask, int mask_stride,
+                                          const int32_t *wsrc,
+                                          const int32_t *mask,
                                           int mi_row, int mi_col,
                                           MV *bestmv, const MV *ref_mv,
                                           int allow_hp, int error_per_bit,
@@ -3240,7 +3233,6 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
                                           int use_upsampled_ref) {
   const int *const z = wsrc;
   const int *const src_address = z;
-  const int src_stride = wsrc_stride;
   MACROBLOCKD *xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[0];
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -3292,14 +3284,14 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
   // use_upsampled_ref can be 0 or 1
   if (use_upsampled_ref)
     besterr = upsampled_setup_obmc_center_error(
-        xd, mask, mask_stride, bestmv, ref_mv, error_per_bit,
-        vfp, z, src_stride, y, y_stride,
+        xd, mask, bestmv, ref_mv, error_per_bit,
+        vfp, z, y, y_stride,
         w, h, (offset << 3),
         mvjcost, mvcost, sse1, distortion);
   else
     besterr = setup_obmc_center_error(
-        mask, mask_stride, bestmv, ref_mv, error_per_bit,
-        vfp, z, src_stride, y, y_stride,
+        mask, bestmv, ref_mv, error_per_bit,
+        vfp, z, y, y_stride,
         offset, mvjcost, mvcost, sse1, distortion);
 
   for (iter = 0; iter < round; ++iter) {
@@ -3313,16 +3305,15 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
         if (use_upsampled_ref) {
           const uint8_t *const pre_address = y + tr * y_stride + tc;
 
-          thismse = upsampled_obmc_pref_error(xd, mask, mask_stride,
-                                              vfp, src_address, src_stride,
+          thismse = upsampled_obmc_pref_error(xd, mask,
+                                              vfp, src_address,
                                               pre_address, y_stride,
                                               w, h, &sse);
         } else {
           const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
               (tc >> 3);
           thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
-                              src_address, src_stride,
-                              mask, mask_stride, &sse);
+                              src_address, mask, &sse);
         }
 
         cost_array[idx] = thismse +
@@ -3350,15 +3341,14 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
       if (use_upsampled_ref) {
         const uint8_t *const pre_address = y + tr * y_stride + tc;
 
-        thismse = upsampled_obmc_pref_error(xd, mask, mask_stride,
-                                            vfp, src_address, src_stride,
+        thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address,
                                             pre_address, y_stride,
                                             w, h, &sse);
       } else {
         const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
 
         thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
-                            src_address, src_stride, mask, mask_stride, &sse);
+                            src_address, mask, &sse);
       }
 
       cost_array[4] = thismse +
@@ -3422,8 +3412,8 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
 #undef CHECK_BETTER
 
 static int get_obmc_mvpred_var(const MACROBLOCK *x,
-                               const int *wsrc, int wsrc_stride,
-                               const int *mask, int mask_stride,
+                               const int32_t *wsrc,
+                               const int32_t *mask,
                                const MV *best_mv, const MV *center_mv,
                                const vp10_variance_fn_ptr_t *vfp,
                                int use_mvcost, int is_second) {
@@ -3433,14 +3423,14 @@ static int get_obmc_mvpred_var(const MACROBLOCK *x,
   unsigned int unused;
 
   return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride,
-                  wsrc, wsrc_stride, mask, mask_stride, &unused) +
+                  wsrc, mask, &unused) +
          (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
                                     x->mvcost, x->errorperbit) : 0);
 }
 
 int obmc_refining_search_sad(const MACROBLOCK *x,
-                             const int *wsrc, int wsrc_stride,
-                             const int *mask, int mask_stride,
+                             const int32_t *wsrc,
+                             const int32_t *mask,
                              MV *ref_mv, int error_per_bit,
                              int search_range,
                              const vp10_variance_fn_ptr_t *fn_ptr,
@@ -3450,8 +3440,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
   const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
-                                       in_what->stride,
-                                       wsrc, wsrc_stride, mask, mask_stride) +
+                                       in_what->stride, wsrc, mask) +
                          mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
@@ -3463,8 +3452,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
                      ref_mv->col + neighbors[j].col};
       if (is_mv_in(x, &mv)) {
         unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
-                                        in_what->stride, wsrc, wsrc_stride,
-                                        mask, mask_stride);
+                                        in_what->stride, wsrc, mask);
         if (sad < best_sad) {
           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sad < best_sad) {
@@ -3487,8 +3475,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
 
 int obmc_diamond_search_sad(const MACROBLOCK *x,
                             const search_site_config *cfg,
-                            const int *wsrc, int wsrc_stride,
-                            const int *mask, int mask_stride,
+                            const int32_t *wsrc, const int32_t *mask,
                             MV *ref_mv, MV *best_mv,
                             int search_param,
                             int sad_per_bit, int *num00,
@@ -3516,8 +3503,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
   *best_mv = *ref_mv;
 
   // Check the starting position
-  best_sad = fn_ptr->osdf(best_address, in_what->stride,
-                          wsrc, wsrc_stride, mask, mask_stride) +
+  best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
              mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   i = 1;
@@ -3528,7 +3514,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
                      best_mv->col + ss[i].mv.col};
       if (is_mv_in(x, &mv)) {
        int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
-                              wsrc, wsrc_stride, mask, mask_stride);
+                              wsrc, mask);
         if (sad < best_sad) {
           sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
           if (sad < best_sad) {
@@ -3552,8 +3538,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
                             best_mv->col + ss[best_site].mv.col};
         if (is_mv_in(x, &this_mv)) {
           int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
-                                 in_what->stride, wsrc, wsrc_stride,
-                                 mask, mask_stride);
+                                 in_what->stride, wsrc, mask);
           if (sad < best_sad) {
             sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (sad < best_sad) {
@@ -3576,8 +3561,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
 }
 
 int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
-                                 const int *wsrc, int wsrc_stride,
-                                 const int *mask, int mask_stride,
+                                 const int32_t *wsrc, const int32_t *mask,
                                  MV *mvp_full, int step_param,
                                  int sadpb, int further_steps, int do_refine,
                                  const vp10_variance_fn_ptr_t *fn_ptr,
@@ -3586,13 +3570,12 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
   MV temp_mv;
   int thissme, n, num00 = 0;
   int bestsme = obmc_diamond_search_sad(x, &cpi->ss_cfg,
-                                        wsrc, wsrc_stride,
-                                        mask, mask_stride,
+                                        wsrc, mask,
                                         mvp_full, &temp_mv,
                                         step_param, sadpb, &n,
                                         fn_ptr, ref_mv, is_second);
   if (bestsme < INT_MAX)
-    bestsme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
+    bestsme = get_obmc_mvpred_var(x, wsrc, mask,
                                   &temp_mv, ref_mv, fn_ptr, 1, is_second);
   *dst_mv = temp_mv;
 
@@ -3608,13 +3591,12 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
       num00--;
     } else {
       thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg,
-                                        wsrc, wsrc_stride,
-                                        mask, mask_stride,
+                                        wsrc, mask,
                                         mvp_full, &temp_mv,
                                         step_param + n, sadpb, &num00,
                                         fn_ptr, ref_mv, is_second);
       if (thissme < INT_MAX)
-        thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
+        thissme = get_obmc_mvpred_var(x, wsrc, mask,
                                       &temp_mv, ref_mv, fn_ptr, 1, is_second);
 
       // check to see if refining search is needed.
@@ -3632,11 +3614,11 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
   if (do_refine) {
     const int search_range = 8;
     MV best_mv = *dst_mv;
-    thissme = obmc_refining_search_sad(x, wsrc, wsrc_stride, mask, mask_stride,
+    thissme = obmc_refining_search_sad(x, wsrc, mask,
                                        &best_mv, sadpb, search_range,
                                        fn_ptr, ref_mv, is_second);
     if (thissme < INT_MAX)
-      thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
+      thissme = get_obmc_mvpred_var(x, wsrc, mask,
                                     &best_mv, ref_mv, fn_ptr, 1, is_second);
     if (thissme < bestsme) {
       bestsme = thissme;
diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h
index 1b4e6130d..704e26c77 100644
--- a/vp10/encoder/mcomp.h
+++ b/vp10/encoder/mcomp.h
@@ -198,16 +198,16 @@ int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
 
 #if CONFIG_OBMC
 int vp10_obmc_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
-                                 const int *wsrc, int wsrc_stride,
-                                 const int *mask, int mask_stride,
+                                 const int32_t *wsrc,
+                                 const int32_t *mask,
                                  MV *mvp_full, int step_param,
                                  int sadpb, int further_steps, int do_refine,
                                  const vp10_variance_fn_ptr_t *fn_ptr,
                                  const MV *ref_mv, MV *dst_mv,
                                  int is_second);
 int vp10_find_best_obmc_sub_pixel_tree_up(struct VP10_COMP *cpi, MACROBLOCK *x,
-                                          const int *wsrc, int wsrc_stride,
-                                          const int *mask, int mask_stride,
+                                          const int32_t *wsrc,
+                                          const int32_t *mask,
                                           int mi_row, int mi_col,
                                           MV *bestmv, const MV *ref_mv,
                                           int allow_hp, int error_per_bit,
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 25f67abd6..7cc65e65e 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -6073,8 +6073,7 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
 #if CONFIG_OBMC
 static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
                                       BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                      const int* wsrc, int wsrc_stride,
-                                      const int* mask, int mask_stride,
+                                      const int32_t* wsrc, const int32_t* mask,
 #if CONFIG_EXT_INTER
                                       int ref_idx,
                                       int mv_idx,
@@ -6173,8 +6172,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
   mvp_full.col >>= 3;
   mvp_full.row >>= 3;
 
-  bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, wsrc_stride,
-                                         mask, mask_stride,
+  bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, mask,
                                          &mvp_full, step_param, sadpb,
                                          MAX_MVSEARCH_STEPS - 1 - step_param,
                                          1, &cpi->fn_ptr[bsize],
@@ -6188,8 +6186,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
   if (bestsme < INT_MAX) {
     int dis;
     vp10_find_best_obmc_sub_pixel_tree_up(cpi, x,
-                                          wsrc, wsrc_stride,
-                                          mask, mask_stride,
+                                          wsrc, mask,
                                           mi_row, mi_col,
                                           &tmp_mv->as_mv, &ref_mv,
                                           cm->allow_high_precision_mv,
@@ -6796,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
 #if CONFIG_OBMC
                                  uint8_t *dst_buf1[3], int dst_stride1[3],
                                  uint8_t *dst_buf2[3], int dst_stride2[3],
-                                 int *wsrc, int wsrc_strides,
-                                 int *mask2d, int mask2d_strides,
+                                 const int32_t *const wsrc,
+                                 const int32_t *const mask2d,
 #endif  // CONFIG_OBMC
 #if CONFIG_EXT_INTER
                                  int_mv single_newmvs[2][MAX_REF_FRAMES],
@@ -7739,8 +7736,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
 
         pred_mv.as_int = mbmi->mv[0].as_int;
         single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col,
-                                  wsrc, wsrc_strides,
-                                  mask2d, mask2d_strides,
+                                  wsrc, mask2d,
 #if CONFIG_EXT_INTER
                                   0, mv_idx,
 #endif  // CONFIG_EXT_INTER
@@ -8494,13 +8490,11 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, int, weighted_src_buf[MAX_SB_SQUARE]);
-  DECLARE_ALIGNED(16, int, mask2d_buf[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, int32_t, weighted_src_buf[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(16, int32_t, mask2d_buf[MAX_SB_SQUARE]);
   uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
   int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
   int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
-  int weighted_src_stride = MAX_SB_SIZE;
-  int mask2d_stride = MAX_SB_SIZE;
 
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -8605,8 +8599,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
   calc_target_weighted_pred(cm, x, xd, mi_row, mi_col,
                             dst_buf1[0], dst_stride1[0],
                             dst_buf2[0], dst_stride2[0],
-                            mask2d_buf, mask2d_stride,
-                            weighted_src_buf, weighted_src_stride);
+                            mask2d_buf, weighted_src_buf);
 #endif  // CONFIG_OBMC
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
@@ -9143,8 +9136,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
 #if CONFIG_OBMC
                                   dst_buf1, dst_stride1,
                                   dst_buf2, dst_stride2,
-                                  weighted_src_buf, weighted_src_stride,
-                                  mask2d_buf, mask2d_stride,
+                                  weighted_src_buf,
+                                  mask2d_buf,
 #endif  // CONFIG_OBMC
 #if CONFIG_EXT_INTER
                                   single_newmvs,
@@ -9258,8 +9251,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
                                            dst_buf1, dst_stride1,
                                            dst_buf2, dst_stride2,
                                            weighted_src_buf,
-                                           weighted_src_stride,
-                                           mask2d_buf, mask2d_stride,
+                                           mask2d_buf,
 #endif  // CONFIG_OBMC
 #if CONFIG_EXT_INTER
                                            dummy_single_newmvs,
@@ -10970,14 +10962,16 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
                                int mi_row, int mi_col,
                                uint8_t *above_buf, int above_stride,
                                uint8_t *left_buf,  int left_stride,
-                               int *mask_buf, int mask_stride,
-                               int *weighted_src_buf, int weighted_src_stride) {
+                               int32_t *mask_buf,
+                               int32_t *weighted_src_buf) {
   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   int row, col, i, mi_step;
   int bw = 8 * xd->n8_w;
   int bh = 8 * xd->n8_h;
-  int *dst = weighted_src_buf;
-  int *mask2d = mask_buf;
+  const int mask_stride = bw;
+  const int weighted_src_stride = bw;
+  int32_t *dst = weighted_src_buf;
+  int32_t *mask2d = mask_buf;
   uint8_t *src;
 #if CONFIG_VP9_HIGHBITDEPTH
   int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
@@ -11009,11 +11003,11 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
         int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
         int bh = overlap >> pd->subsampling_y;
         int dst_stride = weighted_src_stride;
-        int *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
+        int32_t *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
         int tmp_stride = above_stride;
         uint8_t *tmp = above_buf + (i * MI_SIZE >> pd->subsampling_x);
         int mask2d_stride = mask_stride;
-        int *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
+        int32_t *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
         const uint8_t *mask1d[2];
 
         setup_obmc_mask(bh, mask1d);
@@ -11078,14 +11072,14 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
         int bw = overlap >> pd->subsampling_x;
         int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
         int dst_stride = weighted_src_stride;
-        int *dst = weighted_src_buf +
+        int32_t *dst = weighted_src_buf +
                    (i * MI_SIZE * dst_stride >> pd->subsampling_y);
         int tmp_stride = left_stride;
         uint8_t *tmp = left_buf +
                        (i * MI_SIZE * tmp_stride >> pd->subsampling_y);
         int mask2d_stride = mask_stride;
-        int *mask2d = mask_buf +
-                      (i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
+        int32_t *mask2d = mask_buf +
+                          (i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
         const uint8_t *mask1d[2];
 
         setup_obmc_mask(bw, mask1d);
diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h
index be6227b09..b660e2376 100644
--- a/vp10/encoder/rdopt.h
+++ b/vp10/encoder/rdopt.h
@@ -97,8 +97,7 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
                                int mi_row, int mi_col,
                                uint8_t *above_buf, int above_stride,
                                uint8_t *left_buf, int left_stride,
-                               int *mask_buf, int mask_stride,
-                               int *weighted_src_buf, int weighted_src_stride);
+                               int32_t *mask_buf, int32_t *weighted_src_buf);
 #endif  // CONFIG_OBMC
 
 #ifdef __cplusplus
diff --git a/vpx_dsp/sad.c b/vpx_dsp/sad.c
index b3ed41050..bb1daf823 100644
--- a/vpx_dsp/sad.c
+++ b/vpx_dsp/sad.c
@@ -456,21 +456,19 @@ HIGHBD_MASKSADMXN(4, 4)
 // b: target weighted prediction (has been *4096 to keep precision)
 // m: 2d weights (scaled by 4096)
 static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
-                                    const int *b, int b_stride,
-                                    const int *m, int m_stride,
+                                    const int32_t *b,
+                                    const int32_t *m,
                                     int width, int height) {
   int y, x;
   unsigned int sad = 0;
 
   for (y = 0; y < height; y++) {
-    for (x = 0; x < width; x++) {
-      int abs_diff = abs(b[x] - a[x] * m[x]);
-      sad += (abs_diff + 2048) >> 12;
-    }
+    for (x = 0; x < width; x++)
+      sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
 
     a += a_stride;
-    b += b_stride;
-    m += m_stride;
+    b += width;
+    m += width;
   }
 
   return sad;
@@ -478,9 +476,9 @@ static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
 
 #define OBMCSADMxN(m, n)                                                      \
 unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride,    \
-                                       const int *wsrc, int wsrc_stride,      \
-                                       const int *msk, int msk_stride) {      \
-  return obmc_sad(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride, m, n); \
+                                       const int32_t *wsrc,                   \
+                                       const int32_t *msk) {                  \
+  return obmc_sad(ref, ref_stride, wsrc, msk, m, n);                          \
 }
 
 #if CONFIG_EXT_PARTITION
@@ -504,22 +502,20 @@ OBMCSADMxN(4, 4)
 
 #if CONFIG_VP9_HIGHBITDEPTH
 static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
-                                           const int *b, int b_stride,
-                                           const int *m, int m_stride,
+                                           const int32_t *b,
+                                           const int32_t *m,
                                            int width, int height) {
   int y, x;
   unsigned int sad = 0;
   const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
 
   for (y = 0; y < height; y++) {
-    for (x = 0; x < width; x++) {
-      int abs_diff = abs(b[x] - a[x] * m[x]);
-      sad += (abs_diff + 2048) >> 12;
-    }
+    for (x = 0; x < width; x++)
+      sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
 
     a += a_stride;
-    b += b_stride;
-    m += m_stride;
+    b += width;
+    m += width;
   }
 
   return sad;
@@ -528,12 +524,9 @@ static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
 #define HIGHBD_OBMCSADMXN(m, n)                                               \
 unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref,             \
                                               int ref_stride,                 \
-                                              const int *wsrc,                \
-                                              int wsrc_stride,                \
-                                              const int *msk,                 \
-                                              int msk_stride) {               \
-  return highbd_obmc_sad(ref, ref_stride, wsrc, wsrc_stride,                  \
-                         msk, msk_stride, m, n);                              \
+                                              const int32_t *wsrc,            \
+                                              const int32_t *msk) {           \
+  return highbd_obmc_sad(ref, ref_stride, wsrc, msk, m, n);                   \
 }
 
 #if CONFIG_EXT_PARTITION
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index ab3d8bbc1..b9f0e32fd 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -1026,8 +1026,8 @@ HIGHBD_MASK_SUBPIX_VAR(128, 128)
 
 #if CONFIG_VP10 && CONFIG_OBMC
 void obmc_variance(const uint8_t *a, int  a_stride,
-                   const int *b, int  b_stride,
-                   const int *m, int  m_stride,
+                   const int32_t *b,
+                   const int32_t *m,
                    int w, int h, unsigned int *sse, int *sum) {
   int i, j;
 
@@ -1036,26 +1036,24 @@ void obmc_variance(const uint8_t *a, int  a_stride,
 
   for (i = 0; i < h; i++) {
     for (j = 0; j < w; j++) {
-      int scaled_diff = b[j] - a[j] * m[j];
-      int abs_diff = (abs(scaled_diff) + 2048) >> 12;
-      int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff;
+      int diff = ROUND_POWER_OF_TWO_SIGNED(b[j] - a[j] * m[j], 12);
       *sum += diff;
       *sse += diff * diff;
     }
 
     a += a_stride;
-    b += b_stride;
-    m += m_stride;
+    b += w;
+    m += w;
   }
 }
 
 #define OBMC_VAR(W, H) \
 unsigned int vpx_obmc_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
-                                            const int *b, int b_stride, \
-                                            const int *m, int m_stride, \
+                                            const int32_t *b, \
+                                            const int32_t *m, \
                                             unsigned int *sse) { \
   int sum; \
-  obmc_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
+  obmc_variance(a, a_stride, b, m, W, H, sse, &sum); \
   return *sse - (((int64_t)sum * sum) / (W * H)); \
 }
 
@@ -1063,8 +1061,8 @@ unsigned int vpx_obmc_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
 unsigned int vpx_obmc_sub_pixel_variance##W##x##H##_c(                        \
                                         const uint8_t *pre, int pre_stride,   \
                                         int xoffset, int  yoffset,            \
-                                        const int *wsrc, int wsrc_stride,     \
-                                        const int *msk, int msk_stride,       \
+                                        const int32_t *wsrc,                 \
+                                        const int32_t *msk,                   \
                                         unsigned int *sse) {                  \
   uint16_t fdata3[(H + 1) * W];                                               \
   uint8_t temp2[H * W];                                                       \
@@ -1074,8 +1072,7 @@ unsigned int vpx_obmc_sub_pixel_variance##W##x##H##_c(                        \
   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,               \
                                      bilinear_filters_2t[yoffset]);           \
                                                                               \
-  return vpx_obmc_variance##W##x##H##_c(temp2, W, wsrc, wsrc_stride,          \
-                                        msk, msk_stride, sse);                \
+  return vpx_obmc_variance##W##x##H##_c(temp2, W, wsrc, msk, sse);            \
 }
 
 OBMC_VAR(4, 4)
@@ -1130,8 +1127,7 @@ OBMC_SUBPIX_VAR(128, 128)
 
 #if CONFIG_VP9_HIGHBITDEPTH
 void highbd_obmc_variance64(const uint8_t *a8, int  a_stride,
-                            const int *b, int  b_stride,
-                            const int *m, int  m_stride,
+                            const int32_t *b, const int32_t *m,
                             int w, int h, uint64_t *sse, int64_t *sum) {
   int i, j;
   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -1141,51 +1137,43 @@ void highbd_obmc_variance64(const uint8_t *a8, int  a_stride,
 
   for (i = 0; i < h; i++) {
     for (j = 0; j < w; j++) {
-      int scaled_diff = b[j] - a[j] * m[j];
-      int abs_diff = (abs(scaled_diff) + 2048) >> 12;
-      int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff;
+      int diff = ROUND_POWER_OF_TWO_SIGNED(b[j] - a[j] * m[j], 12);
       *sum += diff;
       *sse += diff * diff;
     }
 
     a += a_stride;
-    b += b_stride;
-    m += m_stride;
+    b += w;
+    m += w;
   }
 }
 
 void highbd_obmc_variance(const uint8_t *a8, int  a_stride,
-                          const int *b, int  b_stride,
-                          const int *m, int  m_stride,
-                          int  w, int  h, unsigned int *sse, int *sum) {
+                          const int32_t *b, const int32_t *m,
+                          int w, int h, unsigned int *sse, int *sum) {
   int64_t sum64;
   uint64_t sse64;
-  highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
-                         w, h, &sse64, &sum64);
+  highbd_obmc_variance64(a8, a_stride, b, m, w, h, &sse64, &sum64);
   *sum = (int)sum64;
   *sse = (unsigned int)sse64;
 }
 
 void highbd_10_obmc_variance(const uint8_t *a8, int  a_stride,
-                             const int *b, int  b_stride,
-                             const int *m, int  m_stride,
-                             int  w, int  h, unsigned int *sse, int *sum) {
+                             const int32_t *b, const int32_t *m,
+                             int w, int h, unsigned int *sse, int *sum) {
   int64_t sum64;
   uint64_t sse64;
-  highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
-                         w, h, &sse64, &sum64);
+  highbd_obmc_variance64(a8, a_stride, b, m, w, h, &sse64, &sum64);
   *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
   *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
 }
 
 void highbd_12_obmc_variance(const uint8_t *a8, int  a_stride,
-                             const int *b, int  b_stride,
-                             const int *m, int  m_stride,
-                             int  w, int  h, unsigned int *sse, int *sum) {
+                             const int32_t *b, const int32_t *m,
+                             int w, int h, unsigned int *sse, int *sum) {
   int64_t sum64;
   uint64_t sse64;
-  highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
-                         w, h, &sse64, &sum64);
+  highbd_obmc_variance64(a8, a_stride, b, m, w, h, &sse64, &sum64);
   *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
   *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
 }
@@ -1193,40 +1181,31 @@ void highbd_12_obmc_variance(const uint8_t *a8, int  a_stride,
 #define HIGHBD_OBMC_VAR(W, H)                                                 \
 unsigned int vpx_highbd_obmc_variance##W##x##H##_c(const uint8_t *a,          \
                                                    int a_stride,              \
-                                                   const int *b,              \
-                                                   int b_stride,              \
-                                                   const int *m,              \
-                                                   int m_stride,              \
+                                                   const int32_t *b,          \
+                                                   const int32_t *m,          \
                                                    unsigned int *sse) {       \
   int sum;                                                                    \
-  highbd_obmc_variance(a, a_stride, b, b_stride, m, m_stride,                 \
-                       W, H, sse, &sum);                                      \
+  highbd_obmc_variance(a, a_stride, b, m, W, H, sse, &sum);                   \
   return *sse - (((int64_t)sum * sum) / (W * H));                             \
 }                                                                             \
                                                                               \
 unsigned int vpx_highbd_10_obmc_variance##W##x##H##_c(const uint8_t *a,       \
                                                       int a_stride,           \
-                                                      const int *b,           \
-                                                      int b_stride,           \
-                                                      const int *m,           \
-                                                      int m_stride,           \
+                                                      const int32_t *b,       \
+                                                      const int32_t *m,       \
                                                       unsigned int *sse) {    \
   int sum;                                                                    \
-  highbd_10_obmc_variance(a, a_stride, b, b_stride, m, m_stride,              \
-                          W, H, sse, &sum);                                   \
+  highbd_10_obmc_variance(a, a_stride, b, m, W, H, sse, &sum);                \
   return *sse - (((int64_t)sum * sum) / (W * H));                             \
 }                                                                             \
                                                                               \
 unsigned int vpx_highbd_12_obmc_variance##W##x##H##_c(const uint8_t *a,       \
                                                       int a_stride,           \
-                                                      const int *b,           \
-                                                      int b_stride,           \
-                                                      const int *m,           \
-                                                      int m_stride,           \
+                                                      const int32_t *b,       \
+                                                      const int32_t *m,       \
                                                       unsigned int *sse) {    \
   int sum;                                                                    \
-  highbd_12_obmc_variance(a, a_stride, b, b_stride, m, m_stride,              \
-                          W, H, sse, &sum);                                   \
+  highbd_12_obmc_variance(a, a_stride, b, m, W, H, sse, &sum);                \
   return *sse - (((int64_t)sum * sum) / (W * H));                             \
 }
 
@@ -1234,8 +1213,8 @@ unsigned int vpx_highbd_12_obmc_variance##W##x##H##_c(const uint8_t *a,       \
 unsigned int vpx_highbd_obmc_sub_pixel_variance##W##x##H##_c(                 \
                                         const uint8_t *pre, int pre_stride,   \
                                         int xoffset, int  yoffset,            \
-                                        const int *wsrc, int wsrc_stride,     \
-                                        const int *msk, int msk_stride,       \
+                                        const int32_t *wsrc,                  \
+                                        const int32_t *msk,                   \
                                         unsigned int *sse) {                  \
   uint16_t fdata3[(H + 1) * W];                                               \
   uint16_t temp2[H * W];                                                      \
@@ -1247,15 +1226,14 @@ unsigned int vpx_highbd_obmc_sub_pixel_variance##W##x##H##_c(                 \
                                                bilinear_filters_2t[yoffset]); \
                                                                               \
   return vpx_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2),     \
-                                               W, wsrc, wsrc_stride,          \
-                                               msk, msk_stride, sse);         \
+                                               W, wsrc, msk, sse);            \
 }                                                                             \
                                                                               \
 unsigned int vpx_highbd_10_obmc_sub_pixel_variance##W##x##H##_c(              \
                                         const uint8_t *pre, int pre_stride,   \
                                         int xoffset, int  yoffset,            \
-                                        const int *wsrc, int wsrc_stride,     \
-                                        const int *msk, int msk_stride,       \
+                                        const int32_t *wsrc,                  \
+                                        const int32_t *msk,                   \
                                         unsigned int *sse) {                  \
   uint16_t fdata3[(H + 1) * W];                                               \
   uint16_t temp2[H * W];                                                      \
@@ -1267,15 +1245,14 @@ unsigned int vpx_highbd_10_obmc_sub_pixel_variance##W##x##H##_c(              \
                                                bilinear_filters_2t[yoffset]); \
                                                                               \
   return vpx_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2),  \
-                                                  W, wsrc, wsrc_stride,       \
-                                                  msk, msk_stride, sse);      \
+                                                  W, wsrc, msk, sse);         \
 }                                                                             \
                                                                               \
 unsigned int vpx_highbd_12_obmc_sub_pixel_variance##W##x##H##_c(              \
                                         const uint8_t *pre, int pre_stride,   \
                                         int xoffset, int  yoffset,            \
-                                        const int *wsrc, int wsrc_stride,     \
-                                        const int *msk, int msk_stride,       \
+                                        const int32_t *wsrc,                  \
+                                        const int32_t *msk,                   \
                                         unsigned int *sse) {                  \
   uint16_t fdata3[(H + 1) * W];                                               \
   uint16_t temp2[H * W];                                                      \
@@ -1287,8 +1264,7 @@ unsigned int vpx_highbd_12_obmc_sub_pixel_variance##W##x##H##_c(              \
                                                bilinear_filters_2t[yoffset]); \
                                                                               \
   return vpx_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2),  \
-                                                  W, wsrc, wsrc_stride,       \
-                                                  msk, msk_stride, sse);      \
+                                                  W, wsrc, msk, sse);         \
 }
 
 HIGHBD_OBMC_VAR(4, 4)
diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h
index 88ab5e3a1..aaef8c03e 100644
--- a/vpx_dsp/variance.h
+++ b/vpx_dsp/variance.h
@@ -101,24 +101,18 @@ typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src,
 #if CONFIG_VP10 && CONFIG_OBMC
 typedef unsigned int(*vpx_obmc_sad_fn_t)(const uint8_t *pred,
                                          int pred_stride,
-                                         const int *wsrc,
-                                         int wsrc_stride,
-                                         const int *msk,
-                                         int msk_stride);
+                                         const int32_t *wsrc,
+                                         const int32_t *msk);
 typedef unsigned int (*vpx_obmc_variance_fn_t)(const uint8_t *pred,
                                                int pred_stride,
-                                               const int *wsrc,
-                                               int wsrc_stride,
-                                               const int *msk,
-                                               int msk_stride,
+                                               const int32_t *wsrc,
+                                               const int32_t *msk,
                                                unsigned int *sse);
 typedef unsigned int (*vpx_obmc_subpixvariance_fn_t)(const uint8_t *pred,
                                                      int pred_stride,
                                                      int xoffset, int yoffset,
-                                                     const int *wsrc,
-                                                     int wsrc_stride,
-                                                     const int *msk,
-                                                     int msk_stride,
+                                                     const int32_t *wsrc,
+                                                     const int32_t *msk,
                                                      unsigned int *sse);
 #endif  // CONFIG_VP10 && CONFIG_OBMC
 
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 2fb61f103..4944b2ece 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1103,14 +1103,14 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
 if (vpx_config("CONFIG_OBMC") eq "yes") {
   foreach (@block_sizes) {
     ($w, $h) = @$_;
-    add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride";
+    add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask";
     specialize "vpx_obmc_sad${w}x${h}";
   }
 
   if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
     foreach (@block_sizes) {
       ($w, $h) = @$_;
-      add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride";
+      add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask";
       specialize "vpx_highbd_obmc_sad${w}x${h}";
     }
   }
@@ -1400,8 +1400,8 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
 if (vpx_config("CONFIG_OBMC") eq "yes") {
   foreach (@block_sizes) {
     ($w, $h) = @$_;
-    add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
-    add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int  yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
+    add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse";
+    add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int  yoffset, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse";
     specialize "vpx_obmc_variance${w}x${h}";
     specialize "vpx_obmc_sub_pixel_variance${w}x${h}";
   }
@@ -1410,8 +1410,8 @@ if (vpx_config("CONFIG_OBMC") eq "yes") {
     foreach $bd ("_", "_10_", "_12_") {
       foreach (@block_sizes) {
         ($w, $h) = @$_;
-        add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
-        add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int  yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
+        add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse";
+        add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int  yoffset, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse";
         specialize "vpx_highbd${bd}obmc_variance${w}x${h}";
         specialize "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}";
       }