Clean CONVERT_TO_BYTEPTR/SHORTPTR in convolve

author Linfeng Zhang <linfengz@google.com>

Thu, 6 Apr 2017 00:54:42 +0000 (17:54 -0700)

committer Linfeng Zhang <linfengz@google.com>

Wed, 19 Apr 2017 19:13:49 +0000 (12:13 -0700)
author Linfeng Zhang <linfengz@google.com>
Thu, 6 Apr 2017 00:54:42 +0000 (17:54 -0700)
committer Linfeng Zhang <linfengz@google.com>
Wed, 19 Apr 2017 19:13:49 +0000 (12:13 -0700)
diff --git a/test/convolve_test.cc b/test/convolve_test.cc

index 8b339fadff5b9ed563895633b7f0dfc9fd253b52..a8bab408231fe47b3e09b0e26223a81da88383d1 100644 (file)
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -301,9 +301,9 @@ void wrapper_filter_average_block2d_8_c(
      filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
                                 dst_stride, output_width, output_height);
    } else {
-    highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
+    highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride,
                                        hfilter, vfilter,
-                                      CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+                                      CAST_TO_SHORTPTR(dst_ptr), dst_stride,
                                        output_width, output_height, use_highbd);
    }
  #else
@@ -324,8 +324,8 @@ void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
      filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
                         dst_stride, output_width, output_height);
    } else {
-    highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, hfilter,
-                              vfilter, CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+    highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter,
+                              vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride,
                                output_width, output_height, use_highbd);
    }
  #else
@@ -460,7 +460,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
      if (UUT_->use_highbd_ == 0) {
        return input_ + offset;
      } else {
-      return CONVERT_TO_BYTEPTR(input16_) + offset;
+      return CAST_TO_BYTEPTR(input16_ + offset);
      }
  #else
      return input_ + offset;
@@ -473,7 +473,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
      if (UUT_->use_highbd_ == 0) {
        return output_ + offset;
      } else {
-      return CONVERT_TO_BYTEPTR(output16_) + offset;
+      return CAST_TO_BYTEPTR(output16_ + offset);
      }
  #else
      return output_ + offset;
@@ -486,7 +486,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
      if (UUT_->use_highbd_ == 0) {
        return output_ref_ + offset;
      } else {
-      return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
+      return CAST_TO_BYTEPTR(output16_ref_ + offset);
      }
  #else
      return output_ref_ + offset;
@@ -498,7 +498,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
      if (UUT_->use_highbd_ == 0) {
        return list[index];
      } else {
-      return CONVERT_TO_SHORTPTR(list)[index];
+      return CAST_TO_SHORTPTR(list)[index];
      }
  #else
      return list[index];
@@ -510,7 +510,7 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
      if (UUT_->use_highbd_ == 0) {
        list[index] = (uint8_t)val;
      } else {
-      CONVERT_TO_SHORTPTR(list)[index] = val;
+      CAST_TO_SHORTPTR(list)[index] = val;
      }
  #else
      list[index] = (uint8_t)val;
@@ -718,7 +718,7 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
      if (UUT_->use_highbd_ == 0) {
        ref = ref8;
      } else {
-      ref = CONVERT_TO_BYTEPTR(ref16);
+      ref = CAST_TO_BYTEPTR(ref16);
      }
  #else
      uint8_t ref[kOutputStride * kMaxDimension];
@@ -797,7 +797,7 @@ TEST_P(ConvolveTest, FilterExtremes) {
    if (UUT_->use_highbd_ == 0) {
      ref = ref8;
    } else {
-    ref = CONVERT_TO_BYTEPTR(ref16);
+    ref = CAST_TO_BYTEPTR(ref16);
    }
  #else
    uint8_t ref[kOutputStride * kMaxDimension];
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h

index 4fed4f7f6ecf9367118969692463ea19c63a5d45..cb7d1c63a23a3847106325bedd03da8f5a8d517c 100644 (file)
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -37,8 +37,9 @@ static INLINE void highbd_inter_predictor(
      const int subpel_x, const int subpel_y, const struct scale_factors *sf,
      int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {
    sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
-      src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y],
-      ys, w, h, bd);
+      CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(src)), src_stride,
+      CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)), dst_stride, kernel[subpel_x],
+      xs, kernel[subpel_y], ys, w, h, bd);
  }
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c

index 591a85ee03212b182e973a5cdee6230c7f0e0617..ebe6758c8b93620712fe4aa5e781b39f13a259f2 100644 (file)
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2417,10 +2417,11 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
          uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
  
          if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
-          vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
-                               kernel[x_q4 & 0xf], 16 * src_w / dst_w,
-                               kernel[y_q4 & 0xf], 16 * src_h / dst_h,
-                               16 / factor, 16 / factor, bd);
+          vpx_highbd_convolve8(
+              CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(src_ptr)), src_stride,
+              CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst_ptr)), dst_stride,
+              kernel[x_q4 & 0xf], 16 * src_w / dst_w, kernel[y_q4 & 0xf],
+              16 * src_h / dst_h, 16 / factor, 16 / factor, bd);
          } else {
            vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
                          kernel[x_q4 & 0xf], 16 * src_w / dst_w,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c

index db2bbe7c272cf1e2f622282f87990aa973b955a3..f177814d6f0226a86d0750f6bf95c8c4d96f49c7 100644 (file)
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -2053,9 +2053,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
          this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
  #if CONFIG_VP9_HIGHBITDEPTH
          if (cm->use_highbitdepth)
-          vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
-                                   this_mode_pred->data, this_mode_pred->stride,
-                                   NULL, 0, NULL, 0, bw, bh, xd->bd);
+          vpx_highbd_convolve_copy(
+              CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(best_pred->data)),
+              best_pred->stride,
+              CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(this_mode_pred->data)),
+              this_mode_pred->stride, NULL, 0, NULL, 0, bw, bh, xd->bd);
          else
            vpx_convolve_copy(best_pred->data, best_pred->stride,
                              this_mode_pred->data, this_mode_pred->stride, NULL,
@@ -2162,9 +2164,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
      if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
  #if CONFIG_VP9_HIGHBITDEPTH
        if (cm->use_highbitdepth)
-        vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
-                                 pd->dst.buf, pd->dst.stride, NULL, 0, NULL, 0,
-                                 bw, bh, xd->bd);
+        vpx_highbd_convolve_copy(
+            CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(best_pred->data)),
+            best_pred->stride,
+            CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(pd->dst.buf)), pd->dst.stride,
+            NULL, 0, NULL, 0, bw, bh, xd->bd);
        else
          vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
                            pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index d23d324466d7db622e9f62bc9133e173d5cb37a2..2278ddc0fb7f1282fc89a4a2f425dce9a55df8a8 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -599,9 +599,10 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
  
  #if CONFIG_VP9_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-        recon = CONVERT_TO_BYTEPTR(recon);
-        vpx_highbd_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0,
-                                 bs, bs, xd->bd);
+        vpx_highbd_convolve_copy(CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)),
+                                 dst_stride, recon, 32, NULL, 0, NULL, 0, bs,
+                                 bs, xd->bd);
+        recon = CONVERT_TO_BYTEPTR(recon16);
          if (xd->lossless) {
            vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
          } else {
diff --git a/vpx_dsp/arm/highbd_vpx_convolve8_neon.c b/vpx_dsp/arm/highbd_vpx_convolve8_neon.c

index 1fde13e8d6d6735dff5f26e2593bade3939cb86c..a00aa0444664b8fc206e5ac631f7a3f02e9d190d 100644 (file)
--- a/vpx_dsp/arm/highbd_vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/highbd_vpx_convolve8_neon.c
@@ -145,8 +145,8 @@ void vpx_highbd_convolve8_horiz_neon(const uint8_t *src8, ptrdiff_t src_stride,
      vpx_highbd_convolve8_horiz_c(src8, src_stride, dst8, dst_stride, filter_x,
                                   x_step_q4, filter_y, y_step_q4, w, h, bd);
    } else {
-    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-    uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+    const uint16_t *src = CAST_TO_SHORTPTR(src8);
+    uint16_t *dst = CAST_TO_SHORTPTR(dst8);
      const int16x8_t filters = vld1q_s16(filter_x);
      const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
      uint16x8_t t0, t1, t2, t3;
@@ -348,8 +348,8 @@ void vpx_highbd_convolve8_avg_horiz_neon(const uint8_t *src8,
                                       filter_x, x_step_q4, filter_y, y_step_q4,
                                       w, h, bd);
    } else {
-    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-    uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+    const uint16_t *src = CAST_TO_SHORTPTR(src8);
+    uint16_t *dst = CAST_TO_SHORTPTR(dst8);
      const int16x8_t filters = vld1q_s16(filter_x);
      const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
      uint16x8_t t0, t1, t2, t3;
@@ -579,8 +579,8 @@ void vpx_highbd_convolve8_vert_neon(const uint8_t *src8, ptrdiff_t src_stride,
      vpx_highbd_convolve8_vert_c(src8, src_stride, dst8, dst_stride, filter_x,
                                  x_step_q4, filter_y, y_step_q4, w, h, bd);
    } else {
-    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-    uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+    const uint16_t *src = CAST_TO_SHORTPTR(src8);
+    uint16_t *dst = CAST_TO_SHORTPTR(dst8);
      const int16x8_t filters = vld1q_s16(filter_y);
      const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
  
@@ -748,8 +748,8 @@ void vpx_highbd_convolve8_avg_vert_neon(const uint8_t *src8,
                                      filter_x, x_step_q4, filter_y, y_step_q4, w,
                                      h, bd);
    } else {
-    const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-    uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+    const uint16_t *src = CAST_TO_SHORTPTR(src8);
+    uint16_t *dst = CAST_TO_SHORTPTR(dst8);
      const int16x8_t filters = vld1q_s16(filter_y);
      const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
  
diff --git a/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c b/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c

index f4d70761eb330a391f05d8040296a66fadee41ec..b244caea9c5e481def00456ade862edb48d27d32 100644 (file)
--- a/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c
+++ b/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c
@@ -18,8 +18,8 @@ void vpx_highbd_convolve_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
                                    const int16_t *filter_x, int filter_x_stride,
                                    const int16_t *filter_y, int filter_y_stride,
                                    int w, int h, int bd) {
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
  
    (void)filter_x;
    (void)filter_x_stride;
diff --git a/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c b/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c

index a980ab1a380c5d8cda586f07bd9adea3023d3d3e..9401e7b8c6948f6e68cf03fc1091fb690df03d49 100644 (file)
--- a/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c
+++ b/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c
@@ -18,8 +18,8 @@ void vpx_highbd_convolve_copy_neon(const uint8_t *src8, ptrdiff_t src_stride,
                                     const int16_t *filter_x, int filter_x_stride,
                                     const int16_t *filter_y, int filter_y_stride,
                                     int w, int h, int bd) {
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
  
    (void)filter_x;
    (void)filter_x_stride;
diff --git a/vpx_dsp/arm/highbd_vpx_convolve_neon.c b/vpx_dsp/arm/highbd_vpx_convolve_neon.c

index 4e6e109920a337ccbbc90302aa5d5639ebb00f0d..03a36e4a0623e1613a7ec4d5b87c4a0892c5fc8f 100644 (file)
--- a/vpx_dsp/arm/highbd_vpx_convolve_neon.c
+++ b/vpx_dsp/arm/highbd_vpx_convolve_neon.c
@@ -18,7 +18,7 @@ void vpx_highbd_convolve8_neon(const uint8_t *src8, ptrdiff_t src_stride,
                                 const int16_t *filter_x, int x_step_q4,
                                 const int16_t *filter_y, int y_step_q4, int w,
                                 int h, int bd) {
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
    const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
    // + 1 to make it divisible by 4
    DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
@@ -29,13 +29,12 @@ void vpx_highbd_convolve8_neon(const uint8_t *src8, ptrdiff_t src_stride,
     * height and filter a multiple of 4 lines. Since this goes in to the temp
     * buffer which has lots of extra room and is subsequently discarded this is
     * safe if somewhat less than ideal.   */
-  vpx_highbd_convolve8_horiz_neon(CONVERT_TO_BYTEPTR(src - src_stride * 3),
-                                  src_stride, CONVERT_TO_BYTEPTR(temp), w,
-                                  filter_x, x_step_q4, filter_y, y_step_q4, w,
-                                  intermediate_height, bd);
+  vpx_highbd_convolve8_horiz_neon(
+      CAST_TO_BYTEPTR(src - src_stride * 3), src_stride, CAST_TO_BYTEPTR(temp),
+      w, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, bd);
  
    /* Step into the temp buffer 3 lines to get the actual frame data */
-  vpx_highbd_convolve8_vert_neon(CONVERT_TO_BYTEPTR(temp + w * 3), w, dst,
+  vpx_highbd_convolve8_vert_neon(CAST_TO_BYTEPTR(temp + w * 3), w, dst,
                                   dst_stride, filter_x, x_step_q4, filter_y,
                                   y_step_q4, w, h, bd);
  }
@@ -45,7 +44,7 @@ void vpx_highbd_convolve8_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
                                     const int16_t *filter_x, int x_step_q4,
                                     const int16_t *filter_y, int y_step_q4,
                                     int w, int h, int bd) {
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
    const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
    // + 1 to make it divisible by 4
    DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
@@ -55,11 +54,10 @@ void vpx_highbd_convolve8_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
    /* This implementation has the same issues as above. In addition, we only want
     * to average the values after both passes.
     */
-  vpx_highbd_convolve8_horiz_neon(CONVERT_TO_BYTEPTR(src - src_stride * 3),
-                                  src_stride, CONVERT_TO_BYTEPTR(temp), w,
-                                  filter_x, x_step_q4, filter_y, y_step_q4, w,
-                                  intermediate_height, bd);
-  vpx_highbd_convolve8_avg_vert_neon(CONVERT_TO_BYTEPTR(temp + w * 3), w, dst,
+  vpx_highbd_convolve8_horiz_neon(
+      CAST_TO_BYTEPTR(src - src_stride * 3), src_stride, CAST_TO_BYTEPTR(temp),
+      w, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, bd);
+  vpx_highbd_convolve8_avg_vert_neon(CAST_TO_BYTEPTR(temp + w * 3), w, dst,
                                       dst_stride, filter_x, x_step_q4, filter_y,
                                       y_step_q4, w, h, bd);
  }
diff --git a/vpx_dsp/vpx_convolve.c b/vpx_dsp/vpx_convolve.c

index cab6368e606d5e8ed3b24ff362a8fd9068307d0d..5a62836eb959f0e8c2cf84b45fcd1b26679b76a9 100644 (file)
--- a/vpx_dsp/vpx_convolve.c
+++ b/vpx_dsp/vpx_convolve.c
@@ -324,8 +324,8 @@ static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
                                    const InterpKernel *x_filters, int x0_q4,
                                    int x_step_q4, int w, int h, int bd) {
    int x, y;
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
    src -= SUBPEL_TAPS / 2 - 1;
  
    for (y = 0; y < h; ++y) {
@@ -348,8 +348,8 @@ static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
                                        const InterpKernel *x_filters, int x0_q4,
                                        int x_step_q4, int w, int h, int bd) {
    int x, y;
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
    src -= SUBPEL_TAPS / 2 - 1;
  
    for (y = 0; y < h; ++y) {
@@ -374,8 +374,8 @@ static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
                                   const InterpKernel *y_filters, int y0_q4,
                                   int y_step_q4, int w, int h, int bd) {
    int x, y;
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
    src -= src_stride * (SUBPEL_TAPS / 2 - 1);
  
    for (x = 0; x < w; ++x) {
@@ -400,8 +400,8 @@ static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
                                       const InterpKernel *y_filters, int y0_q4,
                                       int y_step_q4, int w, int h, int bd) {
    int x, y;
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
    src -= src_stride * (SUBPEL_TAPS / 2 - 1);
  
    for (x = 0; x < w; ++x) {
@@ -449,12 +449,12 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
    assert(y_step_q4 <= 32);
    assert(x_step_q4 <= 32);
  
-  highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
-                        CONVERT_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
+  highbd_convolve_horiz(CAST_TO_BYTEPTR(CAST_TO_SHORTPTR(src) -
+                                        src_stride * (SUBPEL_TAPS / 2 - 1)),
+                        src_stride, CAST_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
                          x_step_q4, w, intermediate_height, bd);
-  highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
-                       64, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h,
-                       bd);
+  highbd_convolve_vert(CAST_TO_BYTEPTR(temp + 64 * (SUBPEL_TAPS / 2 - 1)), 64,
+                       dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);
  }
  
  void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
@@ -541,10 +541,10 @@ void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
    assert(w <= 64);
    assert(h <= 64);
  
-  vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
-                         filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
-  vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL,
-                            0, NULL, 0, w, h, bd);
+  vpx_highbd_convolve8_c(src, src_stride, CAST_TO_BYTEPTR(temp), 64, filter_x,
+                         x_step_q4, filter_y, y_step_q4, w, h, bd);
+  vpx_highbd_convolve_avg_c(CAST_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL, 0,
+                            NULL, 0, w, h, bd);
  }
  
  void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
@@ -553,8 +553,8 @@ void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
                                  const int16_t *filter_y, int filter_y_stride,
                                  int w, int h, int bd) {
    int r;
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
  
    (void)filter_x;
    (void)filter_x_stride;
@@ -575,8 +575,8 @@ void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
                                 const int16_t *filter_y, int filter_y_stride,
                                 int w, int h, int bd) {
    int x, y;
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
  
    (void)filter_x;
    (void)filter_x_stride;
diff --git a/vpx_dsp/x86/convolve.h b/vpx_dsp/x86/convolve.h

index d7468ad7ca56a912b35655b158f97b77c583b6e3..ea701641615af3d7dbcb181769d1221e46e0a9c5 100644 (file)
--- a/vpx_dsp/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -107,8 +107,8 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
        ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,       \
        const int16_t *filter_y, int y_step_q4, int w, int h, int bd) {     \
      if (step_q4 == 16 && filter[3] != 128) {                              \
-      uint16_t *src = CONVERT_TO_SHORTPTR(src8);                          \
-      uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);                          \
+      uint16_t *src = CAST_TO_SHORTPTR(src8);                             \
+      uint16_t *dst = CAST_TO_SHORTPTR(dst8);                             \
        if (filter[0] | filter[1] | filter[2]) {                            \
          while (w >= 16) {                                                 \
            vpx_highbd_filter_block1d16_##dir##8_##avg##opt(                \
@@ -162,36 +162,37 @@ typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
      }                                                                     \
    }
  
-#define HIGH_FUN_CONV_2D(avg, opt)                                            \
-  void vpx_highbd_convolve8_##avg##opt(                                       \
-      const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                 \
-      ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,           \
-      const int16_t *filter_y, int y_step_q4, int w, int h, int bd) {         \
-    assert(w <= 64);                                                          \
-    assert(h <= 64);                                                          \
-    if (x_step_q4 == 16 && y_step_q4 == 16) {                                 \
-      if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) {  \
-        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]);                       \
-        vpx_highbd_convolve8_horiz_##opt(                                     \
-            src - 3 * src_stride, src_stride, CONVERT_TO_BYTEPTR(fdata2), 64, \
-            filter_x, x_step_q4, filter_y, y_step_q4, w, h + 7, bd);          \
-        vpx_highbd_convolve8_##avg##vert_##opt(                               \
-            CONVERT_TO_BYTEPTR(fdata2) + 192, 64, dst, dst_stride, filter_x,  \
-            x_step_q4, filter_y, y_step_q4, w, h, bd);                        \
-      } else {                                                                \
-        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]);                       \
-        vpx_highbd_convolve8_horiz_##opt(                                     \
-            src, src_stride, CONVERT_TO_BYTEPTR(fdata2), 64, filter_x,        \
-            x_step_q4, filter_y, y_step_q4, w, h + 1, bd);                    \
-        vpx_highbd_convolve8_##avg##vert_##opt(                               \
-            CONVERT_TO_BYTEPTR(fdata2), 64, dst, dst_stride, filter_x,        \
-            x_step_q4, filter_y, y_step_q4, w, h, bd);                        \
-      }                                                                       \
-    } else {                                                                  \
-      vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride,         \
-                                    filter_x, x_step_q4, filter_y, y_step_q4, \
-                                    w, h, bd);                                \
-    }                                                                         \
+#define HIGH_FUN_CONV_2D(avg, opt)                                             \
+  void vpx_highbd_convolve8_##avg##opt(                                        \
+      const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                  \
+      ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,            \
+      const int16_t *filter_y, int y_step_q4, int w, int h, int bd) {          \
+    assert(w <= 64);                                                           \
+    assert(h <= 64);                                                           \
+    if (x_step_q4 == 16 && y_step_q4 == 16) {                                  \
+      if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) {   \
+        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]);                        \
+        vpx_highbd_convolve8_horiz_##opt(                                      \
+            CAST_TO_BYTEPTR(CAST_TO_SHORTPTR(src) - 3 * src_stride),           \
+            src_stride, CAST_TO_BYTEPTR(fdata2), 64, filter_x, x_step_q4,      \
+            filter_y, y_step_q4, w, h + 7, bd);                                \
+        vpx_highbd_convolve8_##avg##vert_##opt(                                \
+            CAST_TO_BYTEPTR(fdata2 + 192), 64, dst, dst_stride, filter_x,      \
+            x_step_q4, filter_y, y_step_q4, w, h, bd);                         \
+      } else {                                                                 \
+        DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]);                        \
+        vpx_highbd_convolve8_horiz_##opt(                                      \
+            src, src_stride, CAST_TO_BYTEPTR(fdata2), 64, filter_x, x_step_q4, \
+            filter_y, y_step_q4, w, h + 1, bd);                                \
+        vpx_highbd_convolve8_##avg##vert_##opt(                                \
+            CAST_TO_BYTEPTR(fdata2), 64, dst, dst_stride, filter_x, x_step_q4, \
+            filter_y, y_step_q4, w, h, bd);                                    \
+      }                                                                        \
+    } else {                                                                   \
+      vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride,          \
+                                    filter_x, x_step_q4, filter_y, y_step_q4,  \
+                                    w, h, bd);                                 \
+    }                                                                          \
    }
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
diff --git a/vpx_dsp/x86/highbd_convolve_avx2.c b/vpx_dsp/x86/highbd_convolve_avx2.c

index 75589d32a685fb1522d36919d3dd8c71698b4e6b..2b774bf239ff1a3a6dc78c1dc791d05aaf531e4e 100644 (file)
--- a/vpx_dsp/x86/highbd_convolve_avx2.c
+++ b/vpx_dsp/x86/highbd_convolve_avx2.c
@@ -21,8 +21,8 @@ void vpx_highbd_convolve_copy_avx2(const uint8_t *src8, ptrdiff_t src_stride,
                                     const int16_t *filter_x, int filter_x_stride,
                                     const int16_t *filter_y, int filter_y_stride,
                                     int width, int h, int bd) {
-  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  const uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
    (void)filter_x;
    (void)filter_y;
    (void)filter_x_stride;
@@ -104,8 +104,8 @@ void vpx_highbd_convolve_avg_avx2(const uint8_t *src8, ptrdiff_t src_stride,
                                    const int16_t *filter_x, int filter_x_stride,
                                    const int16_t *filter_y, int filter_y_stride,
                                    int width, int h, int bd) {
-  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
-  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+  uint16_t *src = CAST_TO_SHORTPTR(src8);
+  uint16_t *dst = CAST_TO_SHORTPTR(dst8);
    (void)filter_x;
    (void)filter_y;
    (void)filter_x_stride;
diff --git a/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm

index e2311c116705a0c75e02d7e219e1ecb38915e910..389a692dbc9bd8d56f420d1b3482b16f519491ab 100644 (file)
--- a/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
+++ b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
@@ -32,9 +32,7 @@ cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
    mov r4d, dword wm
  %ifidn %2, highbd
    shl r4d, 1
-  shl srcq, 1
    shl src_strideq, 1
-  shl dstq, 1
    shl dst_strideq, 1
  %else
    cmp r4d, 4
author	Linfeng Zhang <linfengz@google.com>
	Thu, 6 Apr 2017 00:54:42 +0000 (17:54 -0700)
committer	Linfeng Zhang <linfengz@google.com>
	Wed, 19 Apr 2017 19:13:49 +0000 (12:13 -0700)
test/convolve_test.cc		patch \| blob \| history
vp9/common/vp9_reconinter.h		patch \| blob \| history
vp9/encoder/vp9_encoder.c		patch \| blob \| history
vp9/encoder/vp9_pickmode.c		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history
vpx_dsp/arm/highbd_vpx_convolve8_neon.c		patch \| blob \| history
vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c		patch \| blob \| history
vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c		patch \| blob \| history
vpx_dsp/arm/highbd_vpx_convolve_neon.c		patch \| blob \| history
vpx_dsp/vpx_convolve.c		patch \| blob \| history
vpx_dsp/x86/convolve.h		patch \| blob \| history
vpx_dsp/x86/highbd_convolve_avx2.c		patch \| blob \| history
vpx_dsp/x86/vpx_convolve_copy_sse2.asm		patch \| blob \| history