From d5349112e86c73a153ce8260cdb9291dc2f1e067 Mon Sep 17 00:00:00 2001 From: Angie Chiang Date: Thu, 4 Feb 2016 16:13:18 -0800 Subject: [PATCH] add convolution function with adjustable length Change-Id: I1a5b1e15a188ef11594d0c6ac0dbd42aac59cfca --- test/test.mk | 1 + test/vp10_convolve_test.cc | 250 ++++++++++++++++++++++++++++++++++++ vp10/common/filter.c | 16 +++ vp10/common/filter.h | 12 ++ vp10/common/reconinter.h | 68 ++++++---- vp10/common/vp10_convolve.c | 199 ++++++++++++++++++++++++++++ vp10/common/vp10_convolve.h | 31 +++++ vp10/decoder/decodeframe.c | 15 ++- vp10/vp10_common.mk | 2 + vpx_ports/mem.h | 4 +- 10 files changed, 569 insertions(+), 29 deletions(-) create mode 100644 test/vp10_convolve_test.cc create mode 100644 vp10/common/vp10_convolve.c create mode 100644 vp10/common/vp10_convolve.h diff --git a/test/test.mk b/test/test.mk index adcebad48..d6d08ff9e 100644 --- a/test/test.mk +++ b/test/test.mk @@ -182,6 +182,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm1d_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm1d_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm2d_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm2d_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_convolve_test.cc TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c diff --git a/test/vp10_convolve_test.cc b/test/vp10_convolve_test.cc new file mode 100644 index 000000000..eea7068e0 --- /dev/null +++ b/test/vp10_convolve_test.cc @@ -0,0 +1,250 @@ +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "./vpx_dsp_rtcd.h" +#include "test/acm_random.h" +#include "vp10/common/filter.h" +#include "vp10/common/vp10_convolve.h" +#include "vpx_dsp/vpx_dsp_common.h" + +using libvpx_test::ACMRandom; + +namespace { +TEST(VP10ConvolveTest, vp10_convolve8) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + INTERP_FILTER interp_filter = EIGHTTAP; + InterpFilterParams filter_params = + vp10_get_interp_filter_params(interp_filter); + ptrdiff_t filter_size = filter_params.tap; + int filter_center = filter_size / 2 - 1; + uint8_t src[12 * 12]; + int src_stride = filter_size; + uint8_t dst[1] = {0}; + uint8_t dst1[1] = {0}; + int dst_stride = 1; + int x_step_q4 = 16; + int y_step_q4 = 16; + int subpel_x_q4 = 3; + int subpel_y_q4 = 2; + int avg = 0; + + int w = 1; + int h = 1; + + for (int i = 0; i < filter_size * filter_size; i++) { + src[i] = rnd.Rand16() % (1 << 8); + } + + vp10_convolve(src + src_stride * filter_center + filter_center, src_stride, + dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4, + subpel_y_q4, y_step_q4, avg); + + const int16_t* x_filter = + vp10_get_interp_filter_kernel(filter_params, subpel_x_q4); + const int16_t* y_filter = + vp10_get_interp_filter_kernel(filter_params, subpel_y_q4); + + vpx_convolve8_c(src + src_stride * filter_center + filter_center, src_stride, + dst1, dst_stride, x_filter, 16, y_filter, 16, w, h); + EXPECT_EQ(dst[0], dst1[0]); +} +TEST(VP10ConvolveTest, vp10_convolve) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + INTERP_FILTER interp_filter = EIGHTTAP; + InterpFilterParams filter_params = + vp10_get_interp_filter_params(interp_filter); + ptrdiff_t filter_size = filter_params.tap; + int filter_center = filter_size / 2 - 1; + uint8_t src[12 * 12]; + int src_stride = filter_size; + uint8_t dst[1] = {0}; + int dst_stride = 1; + int x_step_q4 = 16; + int y_step_q4 = 16; + int subpel_x_q4 = 3; + int subpel_y_q4 = 2; + int avg = 0; + + int w = 1; + int h = 1; + + for (int i = 0; i < filter_size * filter_size; i++) { + src[i] = rnd.Rand16() % (1 << 8); + } + + vp10_convolve(src + src_stride * filter_center + filter_center, src_stride, + dst, dst_stride, w, h, filter_params, subpel_x_q4, x_step_q4, + subpel_y_q4, y_step_q4, avg); + + const int16_t* x_filter = + vp10_get_interp_filter_kernel(filter_params, subpel_x_q4); + const int16_t* y_filter = + vp10_get_interp_filter_kernel(filter_params, subpel_y_q4); + + int temp[12]; + int dst_ref = 0; + for (int r = 0; r < filter_size; r++) { + temp[r] = 0; + for (int c = 0; c < filter_size; c++) { + temp[r] += x_filter[c] * src[r * filter_size + c]; + } + temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS)); + dst_ref += temp[r] * y_filter[r]; + } + dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS)); + EXPECT_EQ(dst[0], dst_ref); +} + +TEST(VP10ConvolveTest, vp10_convolve_avg) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + INTERP_FILTER interp_filter = EIGHTTAP; + InterpFilterParams filter_params = + vp10_get_interp_filter_params(interp_filter); + ptrdiff_t filter_size = filter_params.tap; + int filter_center = filter_size / 2 - 1; + uint8_t src0[12 * 12]; + uint8_t src1[12 * 12]; + int src_stride = filter_size; + uint8_t dst0[1] = {0}; + uint8_t dst1[1] = {0}; + uint8_t dst[1] = {0}; + int dst_stride = 1; + int x_step_q4 = 16; + int y_step_q4 = 16; + int subpel_x_q4 = 3; + int subpel_y_q4 = 2; + int avg = 0; + + int w = 1; + int h = 1; + + for (int i = 0; i < filter_size * filter_size; i++) { + src0[i] = rnd.Rand16() % (1 << 8); + src1[i] = rnd.Rand16() % (1 << 8); + } + + int offset = filter_size * filter_center + filter_center; + + avg = 0; + vp10_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h, + filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, + avg); + avg = 0; + vp10_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h, + filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, + avg); + + avg = 0; + vp10_convolve(src0 + offset, src_stride, dst, dst_stride, w, h, filter_params, + subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg); + avg = 1; + vp10_convolve(src1 + offset, src_stride, dst, dst_stride, w, h, filter_params, + subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg); + + EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1)); +} + +#if CONFIG_VP9_HIGHBITDEPTH +TEST(VP10ConvolveTest, vp10_highbd_convolve) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + INTERP_FILTER interp_filter = EIGHTTAP; + InterpFilterParams filter_params = + vp10_get_interp_filter_params(interp_filter); + ptrdiff_t filter_size = filter_params.tap; + int filter_center = filter_size / 2 - 1; + uint16_t src[12 * 12]; + int src_stride = filter_size; + uint16_t dst[1] = {0}; + int dst_stride = 1; + int x_step_q4 = 16; + int y_step_q4 = 16; + int subpel_x_q4 = 8; + int subpel_y_q4 = 6; + int avg = 0; + int bd = 10; + + int w = 1; + int h = 1; + + for (int i = 0; i < filter_size * filter_size; i++) { + src[i] = rnd.Rand16() % (1 << bd); + } + + vp10_highbd_convolve( + CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center), + src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params, + subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd); + + const int16_t* x_filter = + vp10_get_interp_filter_kernel(filter_params, subpel_x_q4); + const int16_t* y_filter = + vp10_get_interp_filter_kernel(filter_params, subpel_y_q4); + + int temp[12]; + int dst_ref = 0; + for (int r = 0; r < filter_size; r++) { + temp[r] = 0; + for (int c = 0; c < filter_size; c++) { + temp[r] += x_filter[c] * src[r * filter_size + c]; + } + temp[r] = clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd); + dst_ref += temp[r] * y_filter[r]; + } + dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd); + EXPECT_EQ(dst[0], dst_ref); +} + +TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) { + ACMRandom rnd(ACMRandom::DeterministicSeed()); + INTERP_FILTER interp_filter = EIGHTTAP; + InterpFilterParams filter_params = + vp10_get_interp_filter_params(interp_filter); + ptrdiff_t filter_size = filter_params.tap; + int filter_center = filter_size / 2 - 1; + uint16_t src0[12 * 12]; + uint16_t src1[12 * 12]; + int src_stride = filter_size; + uint16_t dst0[1] = {0}; + uint16_t dst1[1] = {0}; + uint16_t dst[1] = {0}; + int dst_stride = 1; + int x_step_q4 = 16; + int y_step_q4 = 16; + int subpel_x_q4 = 3; + int subpel_y_q4 = 2; + int avg = 0; + int bd = 10; + + int w = 1; + int h = 1; + + for (int i = 0; i < filter_size * filter_size; i++) { + src0[i] = rnd.Rand16() % (1 << bd); + src1[i] = rnd.Rand16() % (1 << bd); + } + + int offset = filter_size * filter_center + filter_center; + + avg = 0; + vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride, + CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h, + filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, + y_step_q4, avg, bd); + avg = 0; + vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride, + CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h, + filter_params, subpel_x_q4, x_step_q4, subpel_y_q4, + y_step_q4, avg, bd); + + avg = 0; + vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride, + CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params, + subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd); + avg = 1; + vp10_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride, + CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, filter_params, + subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd); + + EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1)); +} +#endif // CONFIG_VP9_HIGHBITDEPTH +} // namespace diff --git a/vp10/common/filter.c b/vp10/common/filter.c index aaa762881..4176e036a 100644 --- a/vp10/common/filter.c +++ b/vp10/common/filter.c @@ -201,3 +201,19 @@ const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS] = { sub_pel_filters_8smooth, // INTRA_FILTER_8TAP_SMOOTH }; #endif // CONFIG_EXT_INTRA + +static const InterpFilterParams +vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = { + {(const int16_t*)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS}, + {(const int16_t*)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS}, + {(const int16_t*)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS}, +#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4 + {(const int16_t*)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS}, +#endif + {(const int16_t*)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS} +}; + +InterpFilterParams vp10_get_interp_filter_params( + const INTERP_FILTER interp_filter) { + return vp10_interp_filter_params_list[interp_filter]; +} diff --git a/vp10/common/filter.h b/vp10/common/filter.h index a272db894..afebee0ac 100644 --- a/vp10/common/filter.h +++ b/vp10/common/filter.h @@ -55,6 +55,18 @@ typedef enum { extern const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS]; #endif // CONFIG_EXT_INTRA +typedef struct InterpFilterParams { + const int16_t* filter_ptr; + uint16_t tap; + uint16_t subpel_shifts; +} InterpFilterParams; + +InterpFilterParams vp10_get_interp_filter_params( + const INTERP_FILTER interp_filter); +static INLINE const int16_t* vp10_get_interp_filter_kernel( + const InterpFilterParams filter_params, const int subpel) { + return filter_params.filter_ptr + filter_params.tap * subpel; +} #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index d868b2569..3fcdb97fe 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h @@ -13,6 +13,7 @@ #include "vp10/common/filter.h" #include "vp10/common/onyxc_int.h" +#include "vp10/common/vp10_convolve.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus @@ -27,23 +28,34 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, int w, int h, int ref, const INTERP_FILTER interp_filter, int xs, int ys) { - const InterpKernel *kernel = vp10_filter_kernels[interp_filter]; + InterpFilterParams interp_filter_params = + vp10_get_interp_filter_params(interp_filter); + if (interp_filter_params.tap == SUBPEL_TAPS) { + const InterpKernel *kernel = vp10_filter_kernels[interp_filter]; #if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS - if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) { - // Interpolating filter + if (IsInterpolatingFilter(interp_filter)) { + // Interpolating filter + sf->predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); + } else { + sf->predict_ni[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); + } +#else sf->predict[subpel_x != 0][subpel_y != 0][ref]( src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); +#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS } else { - sf->predict_ni[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); + // ref > 0 means this is the second reference frame + // first reference frame's prediction result is already in dst + // therefore we need to average the first and second results + int avg = ref > 0; + vp10_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter_params, + subpel_x, xs, subpel_y, ys, avg); } -#else - sf->predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); -#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS } #if CONFIG_VP9_HIGHBITDEPTH @@ -55,23 +67,35 @@ static INLINE void high_inter_predictor(const uint8_t *src, int src_stride, int w, int h, int ref, const INTERP_FILTER interp_filter, int xs, int ys, int bd) { - const InterpKernel *kernel = vp10_filter_kernels[interp_filter]; + InterpFilterParams interp_filter_params = + vp10_get_interp_filter_params(interp_filter); + if (interp_filter_params.tap == SUBPEL_TAPS) { + const InterpKernel *kernel = vp10_filter_kernels[interp_filter]; #if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS - if (kernel[0][SUBPEL_TAPS / 2 - 1] == 128) { - // Interpolating filter + if (IsInterpolatingFilter(interp_filter)) { + // Interpolating filter + sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); + } else { + sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); + } +#else sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); +#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS } else { - sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); + // ref > 0 means this is the second reference frame + // first reference frame's prediction result is already in dst + // therefore we need to average the first and second results + int avg = ref > 0; + vp10_highbd_convolve(src, src_stride, dst, dst_stride, w, h, + interp_filter_params, subpel_x, xs, subpel_y, ys, avg, + bd); } -#else - sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); -#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vp10/common/vp10_convolve.c b/vp10/common/vp10_convolve.c new file mode 100644 index 000000000..e8c0c9268 --- /dev/null +++ b/vp10/common/vp10_convolve.c @@ -0,0 +1,199 @@ +#include + +#include "vp10/common/filter.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_ports/mem.h" + +#define MAX_BLOCK_WIDTH (64) +#define MAX_BLOCK_HEIGHT (64) +#define MAX_STEP (32) +#define MAX_FILTER_TAP (12) + +static void convolve_horiz(const uint8_t *src, int src_stride, uint8_t *dst, + int dst_stride, int w, int h, + const InterpFilterParams filter_params, + const int subpel_x_q4, int x_step_q4, int avg) { + int x, y; + int filter_size = filter_params.tap; + src -= filter_size / 2 - 1; + for (y = 0; y < h; ++y) { + int x_q4 = subpel_x_q4; + for (x = 0; x < w; ++x) { + const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *x_filter = + vp10_get_interp_filter_kernel(filter_params, x_q4 & SUBPEL_MASK); + int k, sum = 0; + for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k]; + if (avg) { + dst[x] = ROUND_POWER_OF_TWO( + dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); + } else { + dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); + } + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + +static void convolve_vert(const uint8_t *src, int src_stride, uint8_t *dst, + int dst_stride, int w, int h, + const InterpFilterParams filter_params, + const int subpel_y_q4, int y_step_q4, int avg) { + int x, y; + int filter_size = filter_params.tap; + src -= src_stride * (filter_size / 2 - 1); + + for (x = 0; x < w; ++x) { + int y_q4 = subpel_y_q4; + for (y = 0; y < h; ++y) { + const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *y_filter = + vp10_get_interp_filter_kernel(filter_params, y_q4 & SUBPEL_MASK); + int k, sum = 0; + for (k = 0; k < filter_size; ++k) + sum += src_y[k * src_stride] * y_filter[k]; + if (avg) { + dst[y * dst_stride] = ROUND_POWER_OF_TWO( + dst[y * dst_stride] + + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), + 1); + } else { + dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); + } + y_q4 += y_step_q4; + } + ++src; + ++dst; + } +} + +void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst, + int dst_stride, int w, int h, + const InterpFilterParams filter_params, + const int subpel_x_q4, int x_step_q4, const int subpel_y_q4, + int y_step_q4, int avg) { + int filter_size = filter_params.tap; + + // temp's size is set to (maximum possible intermediate_height) * + // MAX_BLOCK_WIDTH + uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) + + MAX_FILTER_TAP) * + MAX_BLOCK_WIDTH]; + int temp_stride = MAX_BLOCK_WIDTH; + + int intermediate_height = + (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size; + + assert(w <= MAX_BLOCK_WIDTH); + assert(h <= MAX_BLOCK_HEIGHT); + assert(y_step_q4 <= MAX_STEP); + assert(x_step_q4 <= MAX_STEP); + assert(filter_params.tap <= MAX_FILTER_TAP); + + convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp, + temp_stride, w, intermediate_height, filter_params, + subpel_x_q4, x_step_q4, 0); + convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst, + dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, avg); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_convolve_horiz(const uint16_t *src, int src_stride, + uint16_t *dst, int dst_stride, int w, int h, + const InterpFilterParams filter_params, + const int subpel_x_q4, int x_step_q4, int avg, + int bd) { + int x, y; + int filter_size = filter_params.tap; + src -= filter_size / 2 - 1; + for (y = 0; y < h; ++y) { + int x_q4 = subpel_x_q4; + for (x = 0; x < w; ++x) { + const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *x_filter = + vp10_get_interp_filter_kernel(filter_params, x_q4 & SUBPEL_MASK); + int k, sum = 0; + for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k]; + if (avg) + dst[x] = ROUND_POWER_OF_TWO( + dst[x] + + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), + 1); + else + dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + +static void highbd_convolve_vert(const uint16_t *src, int src_stride, + uint16_t *dst, int dst_stride, int w, int h, + const InterpFilterParams filter_params, + const int subpel_y_q4, int y_step_q4, int avg, + int bd) { + int x, y; + int filter_size = filter_params.tap; + src -= src_stride * (filter_size / 2 - 1); + + for (x = 0; x < w; ++x) { + int y_q4 = subpel_y_q4; + for (y = 0; y < h; ++y) { + const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; + const int16_t *y_filter = + vp10_get_interp_filter_kernel(filter_params, y_q4 & SUBPEL_MASK); + int k, sum = 0; + for (k = 0; k < filter_size; ++k) + sum += src_y[k * src_stride] * y_filter[k]; + if (avg) { + dst[y * dst_stride] = ROUND_POWER_OF_TWO( + dst[y * dst_stride] + + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), + 1); + } else { + dst[y * dst_stride] = + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); + } + y_q4 += y_step_q4; + } + ++src; + ++dst; + } +} + +void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8, + int dst_stride, int w, int h, + const InterpFilterParams filter_params, + const int subpel_x_q4, int x_step_q4, + const int subpel_y_q4, int y_step_q4, int avg, + int bd) { + int filter_size = filter_params.tap; + + // temp's size is set to (maximum possible intermediate_height) * + // MAX_BLOCK_WIDTH + uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) + + MAX_FILTER_TAP) * + MAX_BLOCK_WIDTH]; + int temp_stride = MAX_BLOCK_WIDTH; + + int intermediate_height = + (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size; + + assert(w <= MAX_BLOCK_WIDTH); + assert(h <= MAX_BLOCK_HEIGHT); + assert(y_step_q4 <= MAX_STEP); + assert(x_step_q4 <= MAX_STEP); + assert(filter_params.tap <= MAX_FILTER_TAP); + + highbd_convolve_horiz( + CONVERT_TO_SHORTPTR(src8 - src_stride * (filter_size / 2 - 1)), + src_stride, temp, temp_stride, w, intermediate_height, filter_params, + subpel_x_q4, x_step_q4, 0, bd); + highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride, + CONVERT_TO_SHORTPTR(dst8), dst_stride, w, h, + filter_params, subpel_y_q4, y_step_q4, avg, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vp10/common/vp10_convolve.h b/vp10/common/vp10_convolve.h new file mode 100644 index 000000000..a3d6c6554 --- /dev/null +++ b/vp10/common/vp10_convolve.h @@ -0,0 +1,31 @@ +#ifndef VP10_COMMON_VP10_CONVOLVE_H_ +#define VP10_COMMON_VP10_CONVOLVE_H_ +#include "vp10/common/filter.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp10_convolve(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + int w, int h, + const InterpFilterParams filter_params, + const int subpel_x, + const int subpel_y, + int xstep, int ystep, int avg); + +#if CONFIG_VP9_HIGHBITDEPTH +void vp10_highbd_convolve(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + int w, int h, + const InterpFilterParams filter_params, + const int subpel_x, + const int subpel_y, + int xstep, int ystep, int avg, int bd); +#endif // CONFIG_VP9_HIGHBITDEPTH + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP10_COMMON_VP10_CONVOLVE_H_ diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 48b121792..eb4ed59ae 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -734,13 +734,17 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; int x_pad = 0, y_pad = 0; + InterpFilterParams filter_params = + vp10_get_interp_filter_params(interp_filter); + int filter_size = filter_params.tap; + if (subpel_x || #if CONFIG_EXT_INTERP !i_filter || #endif (sf->x_step_q4 != SUBPEL_SHIFTS)) { - x0 -= VP9_INTERP_EXTEND - 1; - x1 += VP9_INTERP_EXTEND; + x0 -= filter_size / 2 - 1; + x1 += filter_size / 2; x_pad = 1; } @@ -749,8 +753,8 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, !i_filter || #endif (sf->y_step_q4 != SUBPEL_SHIFTS)) { - y0 -= VP9_INTERP_EXTEND - 1; - y1 += VP9_INTERP_EXTEND; + y0 -= filter_size / 2 - 1; + y1 += filter_size / 2; y_pad = 1; } @@ -767,7 +771,8 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0; const int b_w = x1 - x0 + 1; const int b_h = y1 - y0 + 1; - const int border_offset = y_pad * 3 * b_w + x_pad * 3; + const int border_offset = y_pad * (filter_size / 2 - 1) * b_w + + x_pad * (filter_size / 2 - 1); extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, frame_width, frame_height, border_offset, diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk index fab97eae3..4e89e5ef0 100644 --- a/vp10/vp10_common.mk +++ b/vp10/vp10_common.mk @@ -74,6 +74,8 @@ VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.h VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.c VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h +VP10_COMMON_SRCS-yes += common/vp10_convolve.c +VP10_COMMON_SRCS-yes += common/vp10_convolve.h VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.h VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.c diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h index 7502f9063..ec7c91b86 100644 --- a/vpx_ports/mem.h +++ b/vpx_ports/mem.h @@ -46,8 +46,8 @@ (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) #if CONFIG_VP9_HIGHBITDEPTH -#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1)) -#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1)) +#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)(x)) << 1)) +#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)(x)) >> 1)) #endif // CONFIG_VP9_HIGHBITDEPTH #endif // VPX_PORTS_MEM_H_ -- 2.40.0