From 50b91aff52579ef7f46b47aef0d657a731a2f84f Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Mon, 8 Oct 2018 16:21:54 -0700 Subject: [PATCH] Use 4-tap interp filter in speed 1 sub-pel motion search Added the 4-tap interp filter, and used it for speed 1 sub-pel motion search. Speed 2 motion search still used bilinear filter as before. Speed 1 borg test showed good bit savings. avg_psnr: ovr_psnr: ssim: lowres: -1.125 -1.179 -1.021 midres: -0.717 -0.710 -0.543 hdres: -0.357 -0.370 -0.342 Speed test at speed 1 showed ~10% encoder time increase, which was partially because of no SIMD version of 4-tap filter. Change-Id: Ic9b48cdc6a964538c20144108526682d64348301 --- test/convolve_test.cc | 2 +- vp9/common/vp9_filter.c | 18 ++++++++++++++++-- vp9/common/vp9_filter.h | 3 ++- vp9/encoder/vp9_mcomp.c | 9 ++++++++- vp9/encoder/vp9_speed_features.c | 4 +++- 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/test/convolve_test.cc b/test/convolve_test.cc index 8f6c5cd48..c4b3922e2 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc @@ -789,7 +789,7 @@ TEST_P(ConvolveTest, Copy2D) { } } -const int kNumFilterBanks = 4; +const int kNumFilterBanks = 5; const int kNumFilters = 16; TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c index 6c43af8ce..cadae6f2e 100644 --- a/vp9/common/vp9_filter.c +++ b/vp9/common/vp9_filter.c @@ -63,6 +63,20 @@ DECLARE_ALIGNED(256, static const InterpKernel, { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 } }; -const InterpKernel *vp9_filter_kernels[4] = { - sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters +// 4-tap filter +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_4[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -3, 125, 8, -2, 0, 0 }, + { 0, 0, -6, 120, 18, -4, 0, 0 }, { 0, 0, -8, 115, 27, -6, 0, 0 }, + { 0, 0, -10, 108, 37, -7, 0, 0 }, { 0, 0, -11, 101, 47, -9, 0, 0 }, + { 0, 0, -11, 93, 56, -10, 0, 0 }, { 0, 0, -12, 85, 66, -11, 0, 0 }, + { 0, 0, -11, 75, 75, -11, 0, 0 }, { 0, 0, -11, 66, 85, -12, 0, 0 }, + { 0, 0, -10, 56, 93, -11, 0, 0 }, { 0, 0, -9, 47, 101, -11, 0, 0 }, + { 0, 0, -7, 37, 108, -10, 0, 0 }, { 0, 0, -6, 27, 115, -8, 0, 0 }, + { 0, 0, -4, 18, 120, -6, 0, 0 }, { 0, 0, -2, 8, 125, -3, 0, 0 } +}; + +const InterpKernel *vp9_filter_kernels[5] = { + sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters, + sub_pel_filters_4 }; diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h index b379665b1..0382c88e7 100644 --- a/vp9/common/vp9_filter.h +++ b/vp9/common/vp9_filter.h @@ -25,6 +25,7 @@ extern "C" { #define EIGHTTAP_SHARP 2 #define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ #define BILINEAR 3 +#define FOURTAP 4 // The codec can operate in four possible inter prediction filter mode: // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) @@ -32,7 +33,7 @@ extern "C" { typedef uint8_t INTERP_FILTER; -extern const InterpKernel *vp9_filter_kernels[4]; +extern const InterpKernel *vp9_filter_kernels[5]; #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index ea2619354..0f9051bb7 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -760,7 +760,14 @@ uint32_t vp9_find_best_sub_pixel_tree( unsigned int cost_array[5]; int kr, kc; MvLimits subpel_mv_limits; - const InterpKernel *kernel = vp9_filter_kernels[EIGHTTAP]; + + // TODO(yunqing): need to add 4-tap filter optimization to speed up the + // encoder. + const InterpKernel *kernel = (use_accurate_subpel_search > 0) + ? ((use_accurate_subpel_search == USE_4_TAPS) + ? vp9_filter_kernels[FOURTAP] + : vp9_filter_kernels[EIGHTTAP]) + : vp9_filter_kernels[BILINEAR]; vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); minc = subpel_mv_limits.col_min; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index fc9e8ec44..87b417a4b 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -288,7 +288,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->exhaustive_searches_thresh = (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23) : INT_MAX; - sf->use_accurate_subpel_search = USE_2_TAPS; + sf->use_accurate_subpel_search = USE_4_TAPS; } if (speed >= 2) { @@ -330,6 +330,8 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, good_quality_mesh_patterns[mesh_density_level][i].interval; } } + + sf->use_accurate_subpel_search = USE_2_TAPS; } if (speed >= 3) { -- 2.40.0