Replace by CAST_TO_BYTEPTR/SHORTPTR.
The rule is: if a short ptr is casted to a byte ptr, any offset
operation on the byte ptr must be doubled. We do this by casting to
short ptr first, adding offset, then casting back to byte ptr.
BUG=webm:1388
Change-Id: I9e18a73ba45ddae58fc9dae470c0ff34951fe248
filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
dst_stride, output_width, output_height);
} else {
- highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
+ highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride,
hfilter, vfilter,
- CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+ CAST_TO_SHORTPTR(dst_ptr), dst_stride,
output_width, output_height, use_highbd);
}
#else
filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
dst_stride, output_width, output_height);
} else {
- highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, hfilter,
- vfilter, CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+ highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter,
+ vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride,
output_width, output_height, use_highbd);
}
#else
if (UUT_->use_highbd_ == 0) {
return input_ + offset;
} else {
- return CONVERT_TO_BYTEPTR(input16_) + offset;
+ return CAST_TO_BYTEPTR(input16_ + offset);
}
#else
return input_ + offset;
if (UUT_->use_highbd_ == 0) {
return output_ + offset;
} else {
- return CONVERT_TO_BYTEPTR(output16_) + offset;
+ return CAST_TO_BYTEPTR(output16_ + offset);
}
#else
return output_ + offset;
if (UUT_->use_highbd_ == 0) {
return output_ref_ + offset;
} else {
- return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
+ return CAST_TO_BYTEPTR(output16_ref_ + offset);
}
#else
return output_ref_ + offset;
if (UUT_->use_highbd_ == 0) {
return list[index];
} else {
- return CONVERT_TO_SHORTPTR(list)[index];
+ return CAST_TO_SHORTPTR(list)[index];
}
#else
return list[index];
if (UUT_->use_highbd_ == 0) {
list[index] = (uint8_t)val;
} else {
- CONVERT_TO_SHORTPTR(list)[index] = val;
+ CAST_TO_SHORTPTR(list)[index] = val;
}
#else
list[index] = (uint8_t)val;
if (UUT_->use_highbd_ == 0) {
ref = ref8;
} else {
- ref = CONVERT_TO_BYTEPTR(ref16);
+ ref = CAST_TO_BYTEPTR(ref16);
}
#else
uint8_t ref[kOutputStride * kMaxDimension];
if (UUT_->use_highbd_ == 0) {
ref = ref8;
} else {
- ref = CONVERT_TO_BYTEPTR(ref16);
+ ref = CAST_TO_BYTEPTR(ref16);
}
#else
uint8_t ref[kOutputStride * kMaxDimension];
const int subpel_x, const int subpel_y, const struct scale_factors *sf,
int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y],
- ys, w, h, bd);
+ CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(src)), src_stride,
+ CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)), dst_stride, kernel[subpel_x],
+ xs, kernel[subpel_y], ys, w, h, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
- vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
- kernel[x_q4 & 0xf], 16 * src_w / dst_w,
- kernel[y_q4 & 0xf], 16 * src_h / dst_h,
- 16 / factor, 16 / factor, bd);
+ vpx_highbd_convolve8(
+ CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(src_ptr)), src_stride,
+ CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst_ptr)), dst_stride,
+ kernel[x_q4 & 0xf], 16 * src_w / dst_w, kernel[y_q4 & 0xf],
+ 16 * src_h / dst_h, 16 / factor, 16 / factor, bd);
} else {
vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
kernel[x_q4 & 0xf], 16 * src_w / dst_w,
this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
- vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
- this_mode_pred->data, this_mode_pred->stride,
- NULL, 0, NULL, 0, bw, bh, xd->bd);
+ vpx_highbd_convolve_copy(
+ CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(best_pred->data)),
+ best_pred->stride,
+ CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(this_mode_pred->data)),
+ this_mode_pred->stride, NULL, 0, NULL, 0, bw, bh, xd->bd);
else
vpx_convolve_copy(best_pred->data, best_pred->stride,
this_mode_pred->data, this_mode_pred->stride, NULL,
if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
- vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
- pd->dst.buf, pd->dst.stride, NULL, 0, NULL, 0,
- bw, bh, xd->bd);
+ vpx_highbd_convolve_copy(
+ CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(best_pred->data)),
+ best_pred->stride,
+ CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(pd->dst.buf)), pd->dst.stride,
+ NULL, 0, NULL, 0, bw, bh, xd->bd);
else
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- recon = CONVERT_TO_BYTEPTR(recon);
- vpx_highbd_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0,
- bs, bs, xd->bd);
+ vpx_highbd_convolve_copy(CAST_TO_BYTEPTR(CONVERT_TO_SHORTPTR(dst)),
+ dst_stride, recon, 32, NULL, 0, NULL, 0, bs,
+ bs, xd->bd);
+ recon = CONVERT_TO_BYTEPTR(recon16);
if (xd->lossless) {
vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
} else {
vpx_highbd_convolve8_horiz_c(src8, src_stride, dst8, dst_stride, filter_x,
x_step_q4, filter_y, y_step_q4, w, h, bd);
} else {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
const int16x8_t filters = vld1q_s16(filter_x);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
uint16x8_t t0, t1, t2, t3;
filter_x, x_step_q4, filter_y, y_step_q4,
w, h, bd);
} else {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
const int16x8_t filters = vld1q_s16(filter_x);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
uint16x8_t t0, t1, t2, t3;
vpx_highbd_convolve8_vert_c(src8, src_stride, dst8, dst_stride, filter_x,
x_step_q4, filter_y, y_step_q4, w, h, bd);
} else {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
const int16x8_t filters = vld1q_s16(filter_y);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
filter_x, x_step_q4, filter_y, y_step_q4, w,
h, bd);
} else {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
const int16x8_t filters = vld1q_s16(filter_y);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_x_stride;
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_x_stride;
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4, int w,
int h, int bd) {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
// + 1 to make it divisible by 4
DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
* height and filter a multiple of 4 lines. Since this goes in to the temp
* buffer which has lots of extra room and is subsequently discarded this is
* safe if somewhat less than ideal. */
- vpx_highbd_convolve8_horiz_neon(CONVERT_TO_BYTEPTR(src - src_stride * 3),
- src_stride, CONVERT_TO_BYTEPTR(temp), w,
- filter_x, x_step_q4, filter_y, y_step_q4, w,
- intermediate_height, bd);
+ vpx_highbd_convolve8_horiz_neon(
+ CAST_TO_BYTEPTR(src - src_stride * 3), src_stride, CAST_TO_BYTEPTR(temp),
+ w, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, bd);
/* Step into the temp buffer 3 lines to get the actual frame data */
- vpx_highbd_convolve8_vert_neon(CONVERT_TO_BYTEPTR(temp + w * 3), w, dst,
+ vpx_highbd_convolve8_vert_neon(CAST_TO_BYTEPTR(temp + w * 3), w, dst,
dst_stride, filter_x, x_step_q4, filter_y,
y_step_q4, w, h, bd);
}
const int16_t *filter_x, int x_step_q4,
const int16_t *filter_y, int y_step_q4,
int w, int h, int bd) {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
// + 1 to make it divisible by 4
DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
/* This implementation has the same issues as above. In addition, we only want
* to average the values after both passes.
*/
- vpx_highbd_convolve8_horiz_neon(CONVERT_TO_BYTEPTR(src - src_stride * 3),
- src_stride, CONVERT_TO_BYTEPTR(temp), w,
- filter_x, x_step_q4, filter_y, y_step_q4, w,
- intermediate_height, bd);
- vpx_highbd_convolve8_avg_vert_neon(CONVERT_TO_BYTEPTR(temp + w * 3), w, dst,
+ vpx_highbd_convolve8_horiz_neon(
+ CAST_TO_BYTEPTR(src - src_stride * 3), src_stride, CAST_TO_BYTEPTR(temp),
+ w, filter_x, x_step_q4, filter_y, y_step_q4, w, intermediate_height, bd);
+ vpx_highbd_convolve8_avg_vert_neon(CAST_TO_BYTEPTR(temp + w * 3), w, dst,
dst_stride, filter_x, x_step_q4, filter_y,
y_step_q4, w, h, bd);
}
const InterpKernel *x_filters, int x0_q4,
int x_step_q4, int w, int h, int bd) {
int x, y;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
src -= SUBPEL_TAPS / 2 - 1;
for (y = 0; y < h; ++y) {
const InterpKernel *x_filters, int x0_q4,
int x_step_q4, int w, int h, int bd) {
int x, y;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
src -= SUBPEL_TAPS / 2 - 1;
for (y = 0; y < h; ++y) {
const InterpKernel *y_filters, int y0_q4,
int y_step_q4, int w, int h, int bd) {
int x, y;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
for (x = 0; x < w; ++x) {
const InterpKernel *y_filters, int y0_q4,
int y_step_q4, int w, int h, int bd) {
int x, y;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
for (x = 0; x < w; ++x) {
assert(y_step_q4 <= 32);
assert(x_step_q4 <= 32);
- highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
- CONVERT_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
+ highbd_convolve_horiz(CAST_TO_BYTEPTR(CAST_TO_SHORTPTR(src) -
+ src_stride * (SUBPEL_TAPS / 2 - 1)),
+ src_stride, CAST_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
x_step_q4, w, intermediate_height, bd);
- highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
- 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h,
- bd);
+ highbd_convolve_vert(CAST_TO_BYTEPTR(temp + 64 * (SUBPEL_TAPS / 2 - 1)), 64,
+ dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
assert(w <= 64);
assert(h <= 64);
- vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
- filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
- vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL,
- 0, NULL, 0, w, h, bd);
+ vpx_highbd_convolve8_c(src, src_stride, CAST_TO_BYTEPTR(temp), 64, filter_x,
+ x_step_q4, filter_y, y_step_q4, w, h, bd);
+ vpx_highbd_convolve_avg_c(CAST_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL, 0,
+ NULL, 0, w, h, bd);
}
void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
int r;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_x_stride;
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
int x, y;
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_x_stride;
ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
if (step_q4 == 16 && filter[3] != 128) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ uint16_t *src = CAST_TO_SHORTPTR(src8); \
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8); \
if (filter[0] | filter[1] | filter[2]) { \
while (w >= 16) { \
vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
} \
}
-#define HIGH_FUN_CONV_2D(avg, opt) \
- void vpx_highbd_convolve8_##avg##opt( \
- const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
- const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
- assert(w <= 64); \
- assert(h <= 64); \
- if (x_step_q4 == 16 && y_step_q4 == 16) { \
- if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
- vpx_highbd_convolve8_horiz_##opt( \
- src - 3 * src_stride, src_stride, CONVERT_TO_BYTEPTR(fdata2), 64, \
- filter_x, x_step_q4, filter_y, y_step_q4, w, h + 7, bd); \
- vpx_highbd_convolve8_##avg##vert_##opt( \
- CONVERT_TO_BYTEPTR(fdata2) + 192, 64, dst, dst_stride, filter_x, \
- x_step_q4, filter_y, y_step_q4, w, h, bd); \
- } else { \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
- vpx_highbd_convolve8_horiz_##opt( \
- src, src_stride, CONVERT_TO_BYTEPTR(fdata2), 64, filter_x, \
- x_step_q4, filter_y, y_step_q4, w, h + 1, bd); \
- vpx_highbd_convolve8_##avg##vert_##opt( \
- CONVERT_TO_BYTEPTR(fdata2), 64, dst, dst_stride, filter_x, \
- x_step_q4, filter_y, y_step_q4, w, h, bd); \
- } \
- } else { \
- vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, y_step_q4, \
- w, h, bd); \
- } \
+#define HIGH_FUN_CONV_2D(avg, opt) \
+ void vpx_highbd_convolve8_##avg##opt( \
+ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
+ ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
+ const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \
+ assert(w <= 64); \
+ assert(h <= 64); \
+ if (x_step_q4 == 16 && y_step_q4 == 16) { \
+ if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
+ vpx_highbd_convolve8_horiz_##opt( \
+ CAST_TO_BYTEPTR(CAST_TO_SHORTPTR(src) - 3 * src_stride), \
+ src_stride, CAST_TO_BYTEPTR(fdata2), 64, filter_x, x_step_q4, \
+ filter_y, y_step_q4, w, h + 7, bd); \
+ vpx_highbd_convolve8_##avg##vert_##opt( \
+ CAST_TO_BYTEPTR(fdata2 + 192), 64, dst, dst_stride, filter_x, \
+ x_step_q4, filter_y, y_step_q4, w, h, bd); \
+ } else { \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
+ vpx_highbd_convolve8_horiz_##opt( \
+ src, src_stride, CAST_TO_BYTEPTR(fdata2), 64, filter_x, x_step_q4, \
+ filter_y, y_step_q4, w, h + 1, bd); \
+ vpx_highbd_convolve8_##avg##vert_##opt( \
+ CAST_TO_BYTEPTR(fdata2), 64, dst, dst_stride, filter_x, x_step_q4, \
+ filter_y, y_step_q4, w, h, bd); \
+ } \
+ } else { \
+ vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h, bd); \
+ } \
}
#endif // CONFIG_VP9_HIGHBITDEPTH
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int width, int h, int bd) {
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_y;
(void)filter_x_stride;
const int16_t *filter_x, int filter_x_stride,
const int16_t *filter_y, int filter_y_stride,
int width, int h, int bd) {
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ uint16_t *src = CAST_TO_SHORTPTR(src8);
+ uint16_t *dst = CAST_TO_SHORTPTR(dst8);
(void)filter_x;
(void)filter_y;
(void)filter_x_stride;
mov r4d, dword wm
%ifidn %2, highbd
shl r4d, 1
- shl srcq, 1
shl src_strideq, 1
- shl dstq, 1
shl dst_strideq, 1
%else
cmp r4d, 4