LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm1d_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm2d_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm2d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_convolve_test.cc
TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
--- /dev/null
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "vp10/common/filter.h"
+#include "vp10/common/vp10_convolve.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+TEST(VP10ConvolveTest, vp10_convolve8) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint8_t src[12 * 12];
+ int src_stride = filter_size;
+ uint8_t dst[1] = {0};
+ uint8_t dst1[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 3;
+ int subpel_y_q4 = 2;
+ int avg = 0;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src[i] = rnd.Rand16() % (1 << 8);
+ }
+
+ vp10_convolve(src + src_stride * filter_center + filter_center, src_stride,
+ dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4,
+ subpel_y_q4, y_step_q4, avg);
+
+ const int16_t* x_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+ const int16_t* y_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+
+ vpx_convolve8_c(src + src_stride * filter_center + filter_center, src_stride,
+ dst1, dst_stride, x_filter, 16, y_filter, 16, w, h);
+ EXPECT_EQ(dst[0], dst1[0]);
+}
+TEST(VP10ConvolveTest, vp10_convolve) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint8_t src[12 * 12];
+ int src_stride = filter_size;
+ uint8_t dst[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 3;
+ int subpel_y_q4 = 2;
+ int avg = 0;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src[i] = rnd.Rand16() % (1 << 8);
+ }
+
+ vp10_convolve(src + src_stride * filter_center + filter_center, src_stride,
+ dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4,
+ subpel_y_q4, y_step_q4, avg);
+
+ const int16_t* x_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+ const int16_t* y_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+
+ int temp[12];
+ int dst_ref = 0;
+ for (int r = 0; r < filter_size; r++) {
+ temp[r] = 0;
+ for (int c = 0; c < filter_size; c++) {
+ temp[r] += x_filter[c] * src[r * filter_size + c];
+ }
+ temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
+ dst_ref += temp[r] * y_filter[r];
+ }
+ dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
+ EXPECT_EQ(dst[0], dst_ref);
+}
+
+TEST(VP10ConvolveTest, vp10_convolve_avg) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint8_t src0[12 * 12];
+ uint8_t src1[12 * 12];
+ int src_stride = filter_size;
+ uint8_t dst0[1] = {0};
+ uint8_t dst1[1] = {0};
+ uint8_t dst[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 3;
+ int subpel_y_q4 = 2;
+ int avg = 0;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src0[i] = rnd.Rand16() % (1 << 8);
+ src1[i] = rnd.Rand16() % (1 << 8);
+ }
+
+ int offset = filter_size * filter_center + filter_center;
+
+ avg = 0;
+ vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
+ avg);
+ avg = 0;
+ vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
+ avg);
+
+ avg = 0;
+ vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
+ avg = 1;
+ vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg);
+
+ EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+TEST(VP10ConvolveTest, vp10_highbd_convolve) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint16_t src[12 * 12];
+ int src_stride = filter_size;
+ uint16_t dst[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 8;
+ int subpel_y_q4 = 6;
+ int avg = 0;
+ int bd = 10;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src[i] = rnd.Rand16() % (1 << bd);
+ }
+
+ vp10_highbd_convolve(
+ CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
+ src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+
+ const int16_t* x_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_x_q4);
+ const int16_t* y_filter =
+ vp10_get_interp_filter_kernel(filter_params, subpel_y_q4);
+
+ int temp[12];
+ int dst_ref = 0;
+ for (int r = 0; r < filter_size; r++) {
+ temp[r] = 0;
+ for (int c = 0; c < filter_size; c++) {
+ temp[r] += x_filter[c] * src[r * filter_size + c];
+ }
+ temp[r] = clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
+ dst_ref += temp[r] * y_filter[r];
+ }
+ dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
+ EXPECT_EQ(dst[0], dst_ref);
+}
+
+TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ INTERP_FILTER interp_filter = EIGHTTAP;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ ptrdiff_t filter_size = filter_params.tap;
+ int filter_center = filter_size / 2 - 1;
+ uint16_t src0[12 * 12];
+ uint16_t src1[12 * 12];
+ int src_stride = filter_size;
+ uint16_t dst0[1] = {0};
+ uint16_t dst1[1] = {0};
+ uint16_t dst[1] = {0};
+ int dst_stride = 1;
+ int x_step_q4 = 16;
+ int y_step_q4 = 16;
+ int subpel_x_q4 = 3;
+ int subpel_y_q4 = 2;
+ int avg = 0;
+ int bd = 10;
+
+ int w = 1;
+ int h = 1;
+
+ for (int i = 0; i < filter_size * filter_size; i++) {
+ src0[i] = rnd.Rand16() % (1 << bd);
+ src1[i] = rnd.Rand16() % (1 << bd);
+ }
+
+ int offset = filter_size * filter_center + filter_center;
+
+ avg = 0;
+ vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+ CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+ y_step_q4, avg, bd);
+ avg = 0;
+ vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+ CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, subpel_y_q4,
+ y_step_q4, avg, bd);
+
+ avg = 0;
+ vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
+ CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+ avg = 1;
+ vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
+ CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
+
+ EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+} // namespace
sub_pel_filters_8smooth, // INTRA_FILTER_8TAP_SMOOTH
};
#endif // CONFIG_EXT_INTRA
+
+static const InterpFilterParams
+vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
+ {(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS},
+ {(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS},
+ {(const int16_t*)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS},
+#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
+ {(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS},
+#endif
+ {(const int16_t*)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS}
+};
+
+InterpFilterParams vp10_get_interp_filter_params(
+ const INTERP_FILTER interp_filter) {
+ return vp10_interp_filter_params_list[interp_filter];
+}
extern const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS];
#endif // CONFIG_EXT_INTRA
+typedef struct InterpFilterParams {
+ const int16_t* filter_ptr;
+ uint16_t tap;
+ uint16_t subpel_shifts;
+} InterpFilterParams;
+
+InterpFilterParams vp10_get_interp_filter_params(
+ const INTERP_FILTER interp_filter);
+static INLINE const int16_t* vp10_get_interp_filter_kernel(
+ const InterpFilterParams filter_params, const int subpel) {
+ return filter_params.filter_ptr + filter_params.tap * subpel;
+}
#ifdef __cplusplus
} // extern "C"
#endif
#include "vp10/common/filter.h"
#include "vp10/common/onyxc_int.h"
+#include "vp10/common/vp10_convolve.h"
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
int w, int h, int ref,
const INTERP_FILTER interp_filter,
int xs, int ys) {
- const InterpKernel *kernel = vp10_filter_kernels[interp_filter];
+ InterpFilterParams interp_filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ if (interp_filter_params.tap == SUBPEL_TAPS) {
+ const InterpKernel *kernel = vp10_filter_kernels[interp_filter];
#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
- if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) {
- // Interpolating filter
+ if (IsInterpolatingFilter(interp_filter)) {
+ // Interpolating filter
+ sf->predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+ } else {
+ sf->predict_ni[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+ }
+#else
sf->predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
} else {
- sf->predict_ni[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride,
- kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
+ // ref > 0 means this is the second reference frame
+ // first reference frame's prediction result is already in dst
+ // therefore we need to average the first and second results
+ int avg = ref > 0;
+ vp10_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter_params,
+ subpel_x, xs, subpel_y, ys, avg);
}
-#else
- sf->predict[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride,
- kernel[subpel_x], xs, kernel[subpel_y], ys, w, h);
-#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
}
#if CONFIG_VP9_HIGHBITDEPTH
int w, int h, int ref,
const INTERP_FILTER interp_filter,
int xs, int ys, int bd) {
- const InterpKernel *kernel = vp10_filter_kernels[interp_filter];
+ InterpFilterParams interp_filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ if (interp_filter_params.tap == SUBPEL_TAPS) {
+ const InterpKernel *kernel = vp10_filter_kernels[interp_filter];
#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
- if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) {
- // Interpolating filter
+ if (IsInterpolatingFilter(interp_filter)) {
+ // Interpolating filter
+ sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+ } else {
+ sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride,
+ kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+ }
+#else
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
} else {
- sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride,
- kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
+ // ref > 0 means this is the second reference frame
+ // first reference frame's prediction result is already in dst
+ // therefore we need to average the first and second results
+ int avg = ref > 0;
+ vp10_highbd_convolve(src, src_stride, dst, dst_stride, w, h,
+ interp_filter_params, subpel_x, xs, subpel_y, ys, avg,
+ bd);
}
-#else
- sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride,
- kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
-#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
}
#endif // CONFIG_VP9_HIGHBITDEPTH
--- /dev/null
+#include <assert.h>
+
+#include "vp10/common/filter.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_ports/mem.h"
+
+#define MAX_BLOCK_WIDTH (64)
+#define MAX_BLOCK_HEIGHT (64)
+#define MAX_STEP (32)
+#define MAX_FILTER_TAP (12)
+
+static void convolve_horiz(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, int avg) {
+ int x, y;
+ int filter_size = filter_params.tap;
+ src -= filter_size / 2 - 1;
+ for (y = 0; y < h; ++y) {
+ int x_q4 = subpel_x_q4;
+ for (x = 0; x < w; ++x) {
+ const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *x_filter =
+ vp10_get_interp_filter_kernel(filter_params, x_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
+ if (avg) {
+ dst[x] = ROUND_POWER_OF_TWO(
+ dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
+ } else {
+ dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ x_q4 += x_step_q4;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static void convolve_vert(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4, int avg) {
+ int x, y;
+ int filter_size = filter_params.tap;
+ src -= src_stride * (filter_size / 2 - 1);
+
+ for (x = 0; x < w; ++x) {
+ int y_q4 = subpel_y_q4;
+ for (y = 0; y < h; ++y) {
+ const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const int16_t *y_filter =
+ vp10_get_interp_filter_kernel(filter_params, y_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k)
+ sum += src_y[k * src_stride] * y_filter[k];
+ if (avg) {
+ dst[y * dst_stride] = ROUND_POWER_OF_TWO(
+ dst[y * dst_stride] +
+ clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
+ 1);
+ } else {
+ dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ y_q4 += y_step_q4;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
+ int y_step_q4, int avg) {
+ int filter_size = filter_params.tap;
+
+ // temp's size is set to (maximum possible intermediate_height) *
+ // MAX_BLOCK_WIDTH
+ uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+ MAX_FILTER_TAP) *
+ MAX_BLOCK_WIDTH];
+ int temp_stride = MAX_BLOCK_WIDTH;
+
+ int intermediate_height =
+ (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+ assert(w <= MAX_BLOCK_WIDTH);
+ assert(h <= MAX_BLOCK_HEIGHT);
+ assert(y_step_q4 <= MAX_STEP);
+ assert(x_step_q4 <= MAX_STEP);
+ assert(filter_params.tap <= MAX_FILTER_TAP);
+
+ convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
+ temp_stride, w, intermediate_height, filter_params,
+ subpel_x_q4, x_step_q4, 0);
+ convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
+ dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_convolve_horiz(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, int avg,
+ int bd) {
+ int x, y;
+ int filter_size = filter_params.tap;
+ src -= filter_size / 2 - 1;
+ for (y = 0; y < h; ++y) {
+ int x_q4 = subpel_x_q4;
+ for (x = 0; x < w; ++x) {
+ const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *x_filter =
+ vp10_get_interp_filter_kernel(filter_params, x_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
+ if (avg)
+ dst[x] = ROUND_POWER_OF_TWO(
+ dst[x] +
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
+ 1);
+ else
+ dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ x_q4 += x_step_q4;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static void highbd_convolve_vert(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4, int avg,
+ int bd) {
+ int x, y;
+ int filter_size = filter_params.tap;
+ src -= src_stride * (filter_size / 2 - 1);
+
+ for (x = 0; x < w; ++x) {
+ int y_q4 = subpel_y_q4;
+ for (y = 0; y < h; ++y) {
+ const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const int16_t *y_filter =
+ vp10_get_interp_filter_kernel(filter_params, y_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k)
+ sum += src_y[k * src_stride] * y_filter[k];
+ if (avg) {
+ dst[y * dst_stride] = ROUND_POWER_OF_TWO(
+ dst[y * dst_stride] +
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
+ 1);
+ } else {
+ dst[y * dst_stride] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ }
+ y_q4 += y_step_q4;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4,
+ const int subpel_y_q4, int y_step_q4, int avg,
+ int bd) {
+ int filter_size = filter_params.tap;
+
+ // temp's size is set to (maximum possible intermediate_height) *
+ // MAX_BLOCK_WIDTH
+ uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+ MAX_FILTER_TAP) *
+ MAX_BLOCK_WIDTH];
+ int temp_stride = MAX_BLOCK_WIDTH;
+
+ int intermediate_height =
+ (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+ assert(w <= MAX_BLOCK_WIDTH);
+ assert(h <= MAX_BLOCK_HEIGHT);
+ assert(y_step_q4 <= MAX_STEP);
+ assert(x_step_q4 <= MAX_STEP);
+ assert(filter_params.tap <= MAX_FILTER_TAP);
+
+ highbd_convolve_horiz(
+ CONVERT_TO_SHORTPTR(src8 - src_stride * (filter_size / 2 - 1)),
+ src_stride, temp, temp_stride, w, intermediate_height, filter_params,
+ subpel_x_q4, x_step_q4, 0, bd);
+ highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
+ CONVERT_TO_SHORTPTR(dst8), dst_stride, w, h,
+ filter_params, subpel_y_q4, y_step_q4, avg, bd);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
--- /dev/null
+#ifndef VP10_COMMON_VP10_CONVOLVE_H_
+#define VP10_COMMON_VP10_CONVOLVE_H_
+#include "vp10/common/filter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_convolve(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x,
+ const int subpel_y,
+ int xstep, int ystep, int avg);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_convolve(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x,
+ const int subpel_y,
+ int xstep, int ystep, int avg, int bd);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_VP10_CONVOLVE_H_
int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
int x_pad = 0, y_pad = 0;
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ int filter_size = filter_params.tap;
+
if (subpel_x ||
#if CONFIG_EXT_INTERP
!i_filter ||
#endif
(sf->x_step_q4 != SUBPEL_SHIFTS)) {
- x0 -= VP9_INTERP_EXTEND - 1;
- x1 += VP9_INTERP_EXTEND;
+ x0 -= filter_size / 2 - 1;
+ x1 += filter_size / 2;
x_pad = 1;
}
!i_filter ||
#endif
(sf->y_step_q4 != SUBPEL_SHIFTS)) {
- y0 -= VP9_INTERP_EXTEND - 1;
- y1 += VP9_INTERP_EXTEND;
+ y0 -= filter_size / 2 - 1;
+ y1 += filter_size / 2;
y_pad = 1;
}
const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0;
const int b_w = x1 - x0 + 1;
const int b_h = y1 - y0 + 1;
- const int border_offset = y_pad * 3 * b_w + x_pad * 3;
+ const int border_offset = y_pad * (filter_size / 2 - 1) * b_w +
+ x_pad * (filter_size / 2 - 1);
extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h,
frame_width, frame_height, border_offset,
VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.h
VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.c
VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h
+VP10_COMMON_SRCS-yes += common/vp10_convolve.c
+VP10_COMMON_SRCS-yes += common/vp10_convolve.h
VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h
VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.h
VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.c
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
#if CONFIG_VP9_HIGHBITDEPTH
-#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1))
-#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1))
+#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)(x)) << 1))
+#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)(x)) >> 1))
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // VPX_PORTS_MEM_H_