From: Debargha Mukherjee Date: Tue, 9 Feb 2016 20:24:33 +0000 (-0800) Subject: Adding loop wiener restoration X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8b0a5b8718831f4d52af6333cc7b98c0618bfe5c;p=libvpx Adding loop wiener restoration Adds a wiener filter based restoration scheme in loop which can be optionally selected instead of the bilateral filter. The LMMSE filter generated per frame is a separable symmetric 7 tap filter. Three parameters for each of horizontal and vertical filters are transmitted in the bitstream. The fourth parameter is obtained assuming the sum is normalized to 1. Also integerizes the bilateral filters, along with other refactoring necessary in order to support the new switchable restoration type framework. derflr: -0.75% BDRATE [A lot of videos still prefer bilateral, however since many frames now use the simpler separable filter, the decoding speed is much better]. Further experiments to follow, related to replacing the bilateral. Change-Id: I6b1879983d50aab7ec5647340b6aef6b22299636 --- diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index 4d9f773d6..77f537e0d 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -1568,8 +1568,7 @@ void vp10_setup_past_independence(VP10_COMMON *cm) { // To force update of the sharpness lf->last_sharpness_level = -1; #if CONFIG_LOOP_RESTORATION - lf->restoration_level = 0; - lf->last_restoration_level = 0; + cm->rst_info.restoration_level = -1; #endif // CONFIG_LOOP_RESTORATION vp10_default_coef_probs(cm); diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h index 5a16baa25..81f44de7c 100644 --- a/vp10/common/loopfilter.h +++ b/vp10/common/loopfilter.h @@ -52,11 +52,6 @@ struct loopfilter { // 0 = ZERO_MV, MV signed char mode_deltas[MAX_MODE_LF_DELTAS]; signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; - -#if CONFIG_LOOP_RESTORATION - int restoration_level; - int last_restoration_level; -#endif // CONFIG_LOOP_RESTORATION }; // Need to align this structure so when it is declared and diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index b6051fd81..02d7e1761 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -258,7 +258,8 @@ typedef struct VP10Common { loop_filter_info_n lf_info; #if CONFIG_LOOP_RESTORATION - restoration_info_n rst_info; + RestorationInfo rst_info; + RestorationInternal rst_internal; #endif // CONFIG_LOOP_RESTORATION // Flag signaling how frame contexts should be updated at the end of diff --git a/vp10/common/restoration.c b/vp10/common/restoration.c index c73a2f9a1..71abd7c73 100644 --- a/vp10/common/restoration.c +++ b/vp10/common/restoration.c @@ -18,64 +18,129 @@ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" -#define RESTORATION_RANGE 256 -#define RESTORATION_RANGE_SYM (2 * RESTORATION_RANGE + 1) -static double restoration_filters_r_kf[RESTORATION_LEVELS_KF + 1] +#define RESTORATION_PARAM_PRECISION 16 +#define RESTORATION_RANGE 256 +#define RESTORATION_RANGE_SYM (2 * RESTORATION_RANGE + 1) + +static uint8_t restoration_filters_r_kf[RESTORATION_LEVELS_KF] + [RESTORATION_RANGE_SYM]; +static uint8_t restoration_filters_r[RESTORATION_LEVELS] [RESTORATION_RANGE_SYM]; -static double restoration_filters_r[RESTORATION_LEVELS + 1] - [RESTORATION_RANGE_SYM]; -static double restoration_filters_s_kf[RESTORATION_LEVELS_KF + 1] - [RESTORATION_WIN][RESTORATION_WIN]; -static double restoration_filters_s[RESTORATION_LEVELS + 1] - [RESTORATION_WIN][RESTORATION_WIN]; +static uint8_t restoration_filters_s_kf[RESTORATION_LEVELS_KF] + [RESTORATION_WIN][RESTORATION_WIN]; +static uint8_t restoration_filters_s[RESTORATION_LEVELS] + [RESTORATION_WIN][RESTORATION_WIN]; + +typedef struct restoration_params { + int sigma_x; // spatial variance x + int sigma_y; // spatial variance y + int sigma_r; // range variance +} RestorationParamsType; + +static RestorationParamsType + restoration_level_to_params_arr[RESTORATION_LEVELS] = { + // Values are rounded to 1/16 th precision + {8, 9, 30}, + {9, 8, 30}, + {9, 11, 32}, + {11, 9, 32}, + {14, 14, 32}, + {18, 18, 36}, + {24, 24, 40}, + {32, 32, 40}, +}; + +static RestorationParamsType + restoration_level_to_params_arr_kf[RESTORATION_LEVELS_KF] = { + // Values are rounded to 1/16 th precision + {8, 8, 30}, + {9, 9, 32}, + {10, 10, 32}, + {12, 12, 32}, + {14, 14, 32}, + {18, 18, 36}, + {24, 24, 40}, + {30, 30, 44}, + {36, 36, 48}, + {42, 42, 48}, + {48, 48, 48}, + {48, 48, 56}, + {56, 56, 48}, + {56, 56, 56}, + {56, 56, 64}, + {64, 64, 48}, +}; + +typedef void (*restore_func_type)( + uint8_t *data8, int width, int height, + int stride, RestorationInternal *rst, + uint8_t *tmpdata8, int tmpstride); + +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*restore_func_highbd_type)( + uint8_t *data8, int width, int height, + int stride, RestorationInternal *rst, + uint8_t *tmpdata8, int tmpstride, + int bit_depth); +#endif // CONFIG_VP9_HIGHBITDEPTH + +static INLINE RestorationParamsType vp10_restoration_level_to_params( + int index, int kf) { + return kf ? restoration_level_to_params_arr_kf[index] : + restoration_level_to_params_arr[index]; +} void vp10_loop_restoration_precal() { int i; - for (i = 1; i < RESTORATION_LEVELS_KF + 1; i ++) { - const restoration_params_t param = vp10_restoration_level_to_params(i, 1); + for (i = 0; i < RESTORATION_LEVELS_KF; i ++) { + const RestorationParamsType param = vp10_restoration_level_to_params(i, 1); const int sigma_x = param.sigma_x; const int sigma_y = param.sigma_y; const int sigma_r = param.sigma_r; - const double sigma_r_d = (double)sigma_r / RESTORATION_PRECISION; - const double sigma_x_d = (double)sigma_x / RESTORATION_PRECISION; - const double sigma_y_d = (double)sigma_y / RESTORATION_PRECISION; + const double sigma_r_d = (double)sigma_r / RESTORATION_PARAM_PRECISION; + const double sigma_x_d = (double)sigma_x / RESTORATION_PARAM_PRECISION; + const double sigma_y_d = (double)sigma_y / RESTORATION_PARAM_PRECISION; - double *fr = restoration_filters_r_kf[i] + RESTORATION_RANGE; + uint8_t *fr = restoration_filters_r_kf[i] + RESTORATION_RANGE; int j, x, y; for (j = 0; j <= RESTORATION_RANGE; j++) { - fr[j] = exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)); + fr[j] = (uint8_t)(0.5 + RESTORATION_FILT_STEP * + exp(-(j * j) / (2 * sigma_r_d * sigma_r_d))); fr[-j] = fr[j]; } for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) { for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) { restoration_filters_s_kf[i][y + RESTORATION_HALFWIN] - [x + RESTORATION_HALFWIN] = - exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) - -(y * y) / (2 * sigma_y_d * sigma_y_d)); + [x + RESTORATION_HALFWIN] = + (uint8_t)(0.5 + RESTORATION_FILT_STEP * + exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) + -(y * y) / (2 * sigma_y_d * sigma_y_d))); } } } - for (i = 1; i < RESTORATION_LEVELS + 1; i ++) { - const restoration_params_t param = vp10_restoration_level_to_params(i, 0); + for (i = 0; i < RESTORATION_LEVELS; i ++) { + const RestorationParamsType param = vp10_restoration_level_to_params(i, 0); const int sigma_x = param.sigma_x; const int sigma_y = param.sigma_y; const int sigma_r = param.sigma_r; - const double sigma_r_d = (double)sigma_r / RESTORATION_PRECISION; - const double sigma_x_d = (double)sigma_x / RESTORATION_PRECISION; - const double sigma_y_d = (double)sigma_y / RESTORATION_PRECISION; + const double sigma_r_d = (double)sigma_r / RESTORATION_PARAM_PRECISION; + const double sigma_x_d = (double)sigma_x / RESTORATION_PARAM_PRECISION; + const double sigma_y_d = (double)sigma_y / RESTORATION_PARAM_PRECISION; - double *fr = restoration_filters_r[i] + RESTORATION_RANGE; + uint8_t *fr = restoration_filters_r[i] + RESTORATION_RANGE; int j, x, y; for (j = 0; j <= RESTORATION_RANGE; j++) { - fr[j] = exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)); + fr[j] = (uint8_t)(0.5 + RESTORATION_FILT_STEP * + exp(-(j * j) / (2 * sigma_r_d * sigma_r_d))); fr[-j] = fr[j]; } for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) { for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) { - restoration_filters_s - [i][y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] = - exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) - -(y * y) / (2 * sigma_y_d * sigma_y_d)); + restoration_filters_s[i][y + RESTORATION_HALFWIN] + [x + RESTORATION_HALFWIN] = + (uint8_t)(0.5 + RESTORATION_FILT_STEP * + exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) + -(y * y) / (2 * sigma_y_d * sigma_y_d))); } } } @@ -86,178 +151,229 @@ int vp10_restoration_level_bits(const VP10_COMMON *const cm) { RESTORATION_LEVEL_BITS_KF : RESTORATION_LEVEL_BITS; } -int vp10_loop_restoration_used(int level, int kf) { - const restoration_params_t param = - vp10_restoration_level_to_params(level, kf); - return (param.sigma_x && param.sigma_y && param.sigma_r); -} - -void vp10_loop_restoration_init(restoration_info_n *rst, - int level, int kf) { - rst->restoration_used = vp10_loop_restoration_used(level, kf); - - if (rst->restoration_used) { - int i; +void vp10_loop_restoration_init(RestorationInternal *rst, + RestorationInfo *rsi, int kf) { + int i; + rst->restoration_type = rsi->restoration_type; + if (rsi->restoration_type == RESTORE_BILATERAL) { + const int level = rsi->restoration_level; + assert(level >= 0); rst->wr_lut = kf ? restoration_filters_r_kf[level] : restoration_filters_r[level]; for (i = 0; i < RESTORATION_WIN; i++) rst->wx_lut[i] = kf ? restoration_filters_s_kf[level][i] : restoration_filters_s[level][i]; + } else if (rsi->restoration_type == RESTORE_WIENER) { + rst->vfilter[RESTORATION_HALFWIN] = rst->hfilter[RESTORATION_HALFWIN] = + RESTORATION_FILT_STEP; + for (i = 0; i < RESTORATION_HALFWIN; ++i) { + rst->vfilter[i] = rst->vfilter[RESTORATION_WIN - 1 - i] = rsi->vfilter[i]; + rst->hfilter[i] = rst->hfilter[RESTORATION_WIN - 1 - i] = rsi->hfilter[i]; + rst->vfilter[RESTORATION_HALFWIN] -= 2 * rsi->vfilter[i]; + rst->hfilter[RESTORATION_HALFWIN] -= 2 * rsi->hfilter[i]; + } } } -static int is_in_image(int x, int y, int width, int height) { - return (x >= 0 && x < width && y >= 0 && y < height); -} - -static void loop_restoration_filter(uint8_t *data, int width, int height, - int stride, restoration_info_n *rst, - uint8_t *tmpdata, int tmpstride) { +static void loop_bilateral_filter(uint8_t *data, int width, int height, + int stride, RestorationInternal *rst, + uint8_t *tmpdata, int tmpstride) { int i, j; - const double *wr_lut_ = rst->wr_lut + RESTORATION_RANGE; + const uint8_t *wr_lut_ = rst->wr_lut + RESTORATION_RANGE; - uint8_t *data_p = data; - uint8_t *tmpdata_p = tmpdata; - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { + uint8_t *data_p = data + RESTORATION_HALFWIN * stride; + uint8_t *tmpdata_p = tmpdata + RESTORATION_HALFWIN * tmpstride; + for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; ++i) { + for (j = RESTORATION_HALFWIN; j < width - RESTORATION_HALFWIN; ++j) { int x, y; - double flsum = 0, wtsum = 0, wt; + int flsum = 0, wtsum = 0, wt; uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride; for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) { for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) { - if (!is_in_image(j + x, i + y, width, height)) - continue; - wt = rst->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] * - wr_lut_[data_p2[x] - data_p[j]]; + wt = (int)rst->wx_lut[y + RESTORATION_HALFWIN] + [x + RESTORATION_HALFWIN] * + (int)wr_lut_[data_p2[x] - data_p[j]]; wtsum += wt; flsum += wt * data_p2[x]; } data_p2 += stride; } - assert(wtsum > 0); - tmpdata_p[j] = clip_pixel((int)(flsum / wtsum + 0.5)); + if (wtsum > 0) + tmpdata_p[j] = clip_pixel((int)((flsum + wtsum / 2) / wtsum)); + else + tmpdata_p[j] = data_p[j]; } tmpdata_p += tmpstride; data_p += stride; } - for (i = 0; i < height; ++i) { - memcpy(data + i * stride, tmpdata + i * tmpstride, - width * sizeof(*data)); + for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; ++i) { + memcpy(data + i * stride + RESTORATION_HALFWIN, + tmpdata + i * tmpstride + RESTORATION_HALFWIN, + (width - RESTORATION_HALFWIN * 2) * sizeof(*data)); } } -#if 0 // TODO(yaowu): remove when the experiment is finalized -// Normalized non-separable filter where weights all sum to 1 -static void loop_restoration_filter_norm(uint8_t *data, int width, int height, - int stride, restoration_info_n *rst, - uint8_t *tmpdata, int tmpstride) { - int i, j; + +uint8_t hor_sym_filter(uint8_t *d, int *hfilter) { + int32_t s = (1 << (RESTORATION_FILT_BITS - 1)) + + d[0] * hfilter[RESTORATION_HALFWIN]; + int i; + for (i = 1; i <= RESTORATION_HALFWIN; ++i) + s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i]; + return clip_pixel(s >> RESTORATION_FILT_BITS); +} + +uint8_t ver_sym_filter(uint8_t *d, int stride, int *vfilter) { + int32_t s = (1 << (RESTORATION_FILT_BITS - 1)) + + d[0] * vfilter[RESTORATION_HALFWIN]; + int i; + for (i = 1; i <= RESTORATION_HALFWIN; ++i) + s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i]; + return clip_pixel(s >> RESTORATION_FILT_BITS); +} + +static void loop_wiener_filter(uint8_t *data, int width, int height, + int stride, RestorationInternal *rst, + uint8_t *tmpdata, int tmpstride) { uint8_t *data_p = data; uint8_t *tmpdata_p = tmpdata; - for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; ++i) { + int i, j; + + for (i = 0; i < height; ++i) { + memcpy(tmpdata_p, data_p, sizeof(*data_p) * RESTORATION_HALFWIN); + data_p += RESTORATION_HALFWIN; + tmpdata_p += RESTORATION_HALFWIN; for (j = RESTORATION_HALFWIN; j < width - RESTORATION_HALFWIN; ++j) { - int x, y; - double flsum = 0; - uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride; - for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) { - for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) { - flsum += data_p2[x] * - rst->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN]; - } - data_p2 += stride; - } - tmpdata_p[j] = clip_pixel((int)(flsum + 0.5)); + *tmpdata_p++ = hor_sym_filter(data_p++, rst->hfilter); } - tmpdata_p += tmpstride; + memcpy(tmpdata_p, data_p, sizeof(*data_p) * RESTORATION_HALFWIN); + data_p += RESTORATION_HALFWIN - width + stride; + tmpdata_p += RESTORATION_HALFWIN - width + tmpstride; + } + data_p = data; + tmpdata_p = tmpdata; + for (i = 0; i < RESTORATION_HALFWIN; ++i) { + memcpy(data_p, tmpdata_p, sizeof(*data_p) * width); data_p += stride; + tmpdata_p += tmpstride; } - for (i = 0; i < height; ++i) { - memcpy(data + i * stride, tmpdata + i * tmpstride, - width * sizeof(*data)); + for (; i < height - RESTORATION_HALFWIN; ++i) { + for (j = 0; j < width; ++j) + *data_p++ = ver_sym_filter(tmpdata_p++, tmpstride, rst->vfilter); + data_p += stride - width; + tmpdata_p += tmpstride - width; + } + for (; i < height; ++i) { + memcpy(data_p, tmpdata_p, sizeof(*data_p) * width); + data_p += stride; + tmpdata_p += tmpstride; } } -#endif #if CONFIG_VP9_HIGHBITDEPTH -static void loop_restoration_filter_highbd( +static void loop_bilateral_filter_highbd( uint8_t *data8, int width, int height, - int stride, restoration_info_n *rst, + int stride, RestorationInternal *rst, uint8_t *tmpdata8, int tmpstride, int bit_depth) { int i, j; - const double *wr_lut_ = rst->wr_lut + RESTORATION_RANGE; + const uint8_t *wr_lut_ = rst->wr_lut + RESTORATION_RANGE; uint16_t *data = CONVERT_TO_SHORTPTR(data8); uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8); - uint16_t *data_p = data; - uint16_t *tmpdata_p = tmpdata; - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { + uint16_t *data_p = data + RESTORATION_HALFWIN * stride; + uint16_t *tmpdata_p = tmpdata + RESTORATION_HALFWIN * tmpstride; + for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; ++i) { + for (j = RESTORATION_HALFWIN; j < width - RESTORATION_HALFWIN; ++j) { int x, y, diff_r; - double flsum = 0, wtsum = 0, wt; + int flsum = 0, wtsum = 0, wt; uint16_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride; - for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) { for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) { - if (!is_in_image(j + x, i + y, width, height)) - continue; - diff_r = (data_p2[x] - data_p[j]) >> (bit_depth - 8); assert(diff_r >= -RESTORATION_RANGE && diff_r <= RESTORATION_RANGE); - - wt = rst->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] * - wr_lut_[diff_r]; + wt = (int)rst->wx_lut[y + RESTORATION_HALFWIN] + [x + RESTORATION_HALFWIN] * + (int)wr_lut_[diff_r]; wtsum += wt; flsum += wt * data_p2[x]; } data_p2 += stride; } - - assert(wtsum > 0); - tmpdata_p[j] = (int)(flsum / wtsum + 0.5); + if (wtsum > 0) + tmpdata_p[j] = clip_pixel_highbd((int)((flsum + wtsum / 2) / wtsum), + bit_depth); + else + tmpdata_p[j] = data_p[j]; } tmpdata_p += tmpstride; data_p += stride; } - for (i = 0; i < height; ++i) { - memcpy(data + i * stride, tmpdata + i * tmpstride, - width * sizeof(*data)); + + for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; ++i) { + memcpy(data + i * stride + RESTORATION_HALFWIN, + tmpdata + i * tmpstride + RESTORATION_HALFWIN, + (width - RESTORATION_HALFWIN * 2) * sizeof(*data)); } } -#if 0 // TODO(yaowu): remove when the experiment is finalized -// Normalized non-separable filter where weights all sum to 1 -static void loop_restoration_filter_norm_highbd( - uint8_t *data8, int width, int height, - int stride, restoration_info_n *rst, - uint8_t *tmpdata8, int tmpstride) { - int i, j; +uint16_t hor_sym_filter_highbd(uint16_t *d, int *hfilter, int bd) { + int32_t s = (1 << (RESTORATION_FILT_BITS - 1)) + + d[0] * hfilter[RESTORATION_HALFWIN]; + int i; + for (i = 1; i <= RESTORATION_HALFWIN; ++i) + s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i]; + return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd); +} + +uint16_t ver_sym_filter_highbd(uint16_t *d, int stride, int *vfilter, int bd) { + int32_t s = (1 << (RESTORATION_FILT_BITS - 1)) + + d[0] * vfilter[RESTORATION_HALFWIN]; + int i; + for (i = 1; i <= RESTORATION_HALFWIN; ++i) + s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i]; + return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd); +} + +static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height, + int stride, RestorationInternal *rst, + uint8_t *tmpdata8, int tmpstride, + int bit_depth) { uint16_t *data = CONVERT_TO_SHORTPTR(data8); uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8); uint16_t *data_p = data; uint16_t *tmpdata_p = tmpdata; - for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; ++i) { + int i, j; + for (i = 0; i < height; ++i) { + memcpy(tmpdata_p, data_p, sizeof(*data_p) * RESTORATION_HALFWIN); + data_p += RESTORATION_HALFWIN; + tmpdata_p += RESTORATION_HALFWIN; for (j = RESTORATION_HALFWIN; j < width - RESTORATION_HALFWIN; ++j) { - int x, y; - double flsum = 0; - uint16_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride; - for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) { - for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) { - flsum += data_p2[x] * - rst->wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN]; - } - data_p2 += stride; - } - tmpdata_p[j] = (int)(flsum + 0.5); + *tmpdata_p++ = hor_sym_filter_highbd(data_p++, rst->hfilter, bit_depth); } - tmpdata_p += tmpstride; + memcpy(tmpdata_p, data_p, sizeof(*data_p) * RESTORATION_HALFWIN); + data_p += RESTORATION_HALFWIN - width + stride; + tmpdata_p += RESTORATION_HALFWIN - width + tmpstride; + } + data_p = data; + tmpdata_p = tmpdata; + for (i = 0; i < RESTORATION_HALFWIN; ++i) { + memcpy(data_p, tmpdata_p, sizeof(*data_p) * width); data_p += stride; + tmpdata_p += tmpstride; } - for (i = 0; i < height; ++i) { - memcpy(data + i * stride, tmpdata + i * tmpstride, - width * sizeof(*data)); + for (; i < height - RESTORATION_HALFWIN; ++i) { + for (j = 0; j < width; ++j) + *data_p++ = ver_sym_filter_highbd( + tmpdata_p++, tmpstride, rst->vfilter, bit_depth); + data_p += stride - width; + tmpdata_p += tmpstride - width; + } + for (; i < height; ++i) { + memcpy(data_p, tmpdata_p, sizeof(*data_p) * width); + data_p += stride; + tmpdata_p += tmpstride; } } -#endif #endif // CONFIG_VP9_HIGHBITDEPTH void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, @@ -272,7 +388,16 @@ void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, const int uvstart = ystart >> cm->subsampling_y; int yend = end_mi_row << MI_SIZE_LOG2; int uvend = yend >> cm->subsampling_y; + restore_func_type restore_func = + cm->rst_internal.restoration_type == RESTORE_BILATERAL ? + loop_bilateral_filter : loop_wiener_filter; +#if CONFIG_VP9_HIGHBITDEPTH + restore_func_highbd_type restore_func_highbd = + cm->rst_internal.restoration_type == RESTORE_BILATERAL ? + loop_bilateral_filter_highbd : loop_wiener_filter_highbd; +#endif // CONFIG_VP9_HIGHBITDEPTH YV12_BUFFER_CONFIG *tmp_buf; + yend = VPXMIN(yend, cm->height); uvend = VPXMIN(uvend, cm->subsampling_y ? (cm->height + 1) >> 1 : cm->height); @@ -290,41 +415,41 @@ void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) - loop_restoration_filter_highbd( + restore_func_highbd( frame->y_buffer + ystart * ystride, - ywidth, yend - ystart, ystride, &cm->rst_info, + ywidth, yend - ystart, ystride, &cm->rst_internal, tmp_buf->y_buffer + ystart * tmp_buf->y_stride, tmp_buf->y_stride, cm->bit_depth); else #endif // CONFIG_VP9_HIGHBITDEPTH - loop_restoration_filter( - frame->y_buffer + ystart * ystride, - ywidth, yend - ystart, ystride, &cm->rst_info, - tmp_buf->y_buffer + ystart * tmp_buf->y_stride, - tmp_buf->y_stride); + restore_func( + frame->y_buffer + ystart * ystride, + ywidth, yend - ystart, ystride, &cm->rst_internal, + tmp_buf->y_buffer + ystart * tmp_buf->y_stride, + tmp_buf->y_stride); if (!y_only) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - loop_restoration_filter_highbd( + restore_func_highbd( frame->u_buffer + uvstart * uvstride, - uvwidth, uvend - uvstart, uvstride, &cm->rst_info, + uvwidth, uvend - uvstart, uvstride, &cm->rst_internal, tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride, tmp_buf->uv_stride, cm->bit_depth); - loop_restoration_filter_highbd( + restore_func_highbd( frame->v_buffer + uvstart * uvstride, - uvwidth, uvend - uvstart, uvstride, &cm->rst_info, + uvwidth, uvend - uvstart, uvstride, &cm->rst_internal, tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride, tmp_buf->uv_stride, cm->bit_depth); } else { #endif // CONFIG_VP9_HIGHBITDEPTH - loop_restoration_filter( + restore_func( frame->u_buffer + uvstart * uvstride, - uvwidth, uvend - uvstart, uvstride, &cm->rst_info, + uvwidth, uvend - uvstart, uvstride, &cm->rst_internal, tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride, tmp_buf->uv_stride); - loop_restoration_filter( + restore_func( frame->v_buffer + uvstart * uvstride, - uvwidth, uvend - uvstart, uvstride, &cm->rst_info, + uvwidth, uvend - uvstart, uvstride, &cm->rst_internal, tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride, tmp_buf->uv_stride); #if CONFIG_VP9_HIGHBITDEPTH @@ -334,21 +459,21 @@ void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, } void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, - VP10_COMMON *cm, - int restoration_level, - int y_only, int partial_frame) { + VP10_COMMON *cm, + RestorationInfo *rsi, + int y_only, int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; - vp10_loop_restoration_init(&cm->rst_info, restoration_level, - cm->frame_type == KEY_FRAME); - if (!cm->rst_info.restoration_used) - return; - start_mi_row = 0; - mi_rows_to_filter = cm->mi_rows; - if (partial_frame && cm->mi_rows > 8) { - start_mi_row = cm->mi_rows >> 1; - start_mi_row &= 0xfffffff8; - mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + if (rsi->restoration_type != RESTORE_NONE) { + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + vp10_loop_restoration_init(&cm->rst_internal, rsi, + cm->frame_type == KEY_FRAME); + vp10_loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, y_only); } - end_mi_row = start_mi_row + mi_rows_to_filter; - vp10_loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, y_only); } diff --git a/vp10/common/restoration.h b/vp10/common/restoration.h index 3859191f1..43b140e60 100644 --- a/vp10/common/restoration.h +++ b/vp10/common/restoration.h @@ -26,74 +26,54 @@ extern "C" { #define RESTORATION_LEVELS (1 << RESTORATION_LEVEL_BITS) #define DEF_RESTORATION_LEVEL 2 -#define RESTORATION_PRECISION 16 #define RESTORATION_HALFWIN 3 +#define RESTORATION_HALFWIN1 (RESTORATION_HALFWIN + 1) #define RESTORATION_WIN (2 * RESTORATION_HALFWIN + 1) +#define RESTORATION_WIN2 ((RESTORATION_WIN) * (RESTORATION_WIN)) -typedef struct restoration_params { - int sigma_x; // spatial variance x - int sigma_y; // spatial variance y - int sigma_r; // range variance -} restoration_params_t; +#define RESTORATION_FILT_BITS 7 +#define RESTORATION_FILT_STEP (1 << RESTORATION_FILT_BITS) -static restoration_params_t - restoration_level_to_params_arr[RESTORATION_LEVELS + 1] = { - // Values are rounded to 1/16 th precision - {0, 0, 0}, // 0 - default - {8, 9, 30}, - {9, 8, 30}, - {9, 11, 32}, - {11, 9, 32}, - {14, 14, 32}, - {18, 18, 36}, - {24, 24, 40}, - {32, 32, 40}, -}; +#define WIENER_FILT_TAP0_MINV 3 +#define WIENER_FILT_TAP1_MINV (-23) +#define WIENER_FILT_TAP2_MINV 5 -static restoration_params_t - restoration_level_to_params_arr_kf[RESTORATION_LEVELS_KF + 1] = { - // Values are rounded to 1/16 th precision - {0, 0, 0}, // 0 - default - {8, 8, 30}, - {9, 9, 32}, - {10, 10, 32}, - {12, 12, 32}, - {14, 14, 32}, - {18, 18, 36}, - {24, 24, 40}, - {30, 30, 44}, - {36, 36, 48}, - {42, 42, 48}, - {48, 48, 48}, - {48, 48, 56}, - {56, 56, 48}, - {56, 56, 56}, - {56, 56, 64}, - {64, 64, 48}, -}; +#define WIENER_FILT_TAP0_BITS 2 +#define WIENER_FILT_TAP1_BITS 4 +#define WIENER_FILT_TAP2_BITS 5 -typedef struct { - double *wx_lut[RESTORATION_WIN]; - double *wr_lut; - int restoration_sigma_x_set; - int restoration_sigma_y_set; - int restoration_sigma_r_set; - int restoration_used; -} restoration_info_n; +#define WIENER_FILT_TAP0_MAXV \ + (WIENER_FILT_TAP0_MINV -1 + (1 << WIENER_FILT_TAP0_BITS)) +#define WIENER_FILT_TAP1_MAXV \ + (WIENER_FILT_TAP1_MINV -1 + (1 << WIENER_FILT_TAP1_BITS)) +#define WIENER_FILT_TAP2_MAXV \ + (WIENER_FILT_TAP2_MINV -1 + (1 << WIENER_FILT_TAP2_BITS)) + +typedef enum { + RESTORE_NONE, + RESTORE_BILATERAL, + RESTORE_WIENER, +} RestorationType; -int vp10_restoration_level_bits(const struct VP10Common *const cm); -int vp10_loop_restoration_used(int level, int kf); +typedef struct { + RestorationType restoration_type; + int restoration_level; + int vfilter[RESTORATION_HALFWIN], hfilter[RESTORATION_HALFWIN]; +} RestorationInfo; -static INLINE restoration_params_t vp10_restoration_level_to_params( - int index, int kf) { - return kf ? restoration_level_to_params_arr_kf[index] : - restoration_level_to_params_arr[index]; -} +typedef struct { + RestorationType restoration_type; + uint8_t *wx_lut[RESTORATION_WIN]; + uint8_t *wr_lut; + int vfilter[RESTORATION_WIN], hfilter[RESTORATION_WIN]; +} RestorationInternal; -void vp10_loop_restoration_init(restoration_info_n *rst, int T, int kf); +int vp10_restoration_level_bits(const struct VP10Common *const cm); +void vp10_loop_restoration_init(RestorationInternal *rst, + RestorationInfo *rsi, int kf); void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, struct VP10Common *cm, - int restoration_level, + RestorationInfo *rsi, int y_only, int partial_frame); void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, struct VP10Common *cm, diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index ec1a5fb78..8ed9d2cd0 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -2126,6 +2126,36 @@ static void setup_segmentation(VP10_COMMON *const cm, } } +#if CONFIG_LOOP_RESTORATION +static void setup_restoration(VP10_COMMON *cm, + struct vpx_read_bit_buffer *rb) { + RestorationInfo *rst = &cm->rst_info; + if (vpx_rb_read_bit(rb)) { + if (vpx_rb_read_bit(rb)) { + rst->restoration_type = RESTORE_BILATERAL; + rst->restoration_level = + vpx_rb_read_literal(rb, vp10_restoration_level_bits(cm)); + } else { + rst->restoration_type = RESTORE_WIENER; + rst->vfilter[0] = vpx_rb_read_literal(rb, WIENER_FILT_TAP0_BITS) + + WIENER_FILT_TAP0_MINV; + rst->vfilter[1] = vpx_rb_read_literal(rb, WIENER_FILT_TAP1_BITS) + + WIENER_FILT_TAP1_MINV; + rst->vfilter[2] = vpx_rb_read_literal(rb, WIENER_FILT_TAP2_BITS) + + WIENER_FILT_TAP2_MINV; + rst->hfilter[0] = vpx_rb_read_literal(rb, WIENER_FILT_TAP0_BITS) + + WIENER_FILT_TAP0_MINV; + rst->hfilter[1] = vpx_rb_read_literal(rb, WIENER_FILT_TAP1_BITS) + + WIENER_FILT_TAP1_MINV; + rst->hfilter[2] = vpx_rb_read_literal(rb, WIENER_FILT_TAP2_BITS) + + WIENER_FILT_TAP2_MINV; + } + } else { + rst->restoration_type = RESTORE_NONE; + } +} +#endif // CONFIG_LOOP_RESTORATION + static void setup_loopfilter(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) { struct loopfilter *lf = &cm->lf; @@ -2151,19 +2181,6 @@ static void setup_loopfilter(VP10_COMMON *cm, lf->mode_deltas[i] = vpx_rb_read_inv_signed_literal(rb, 6); } } -#if CONFIG_LOOP_RESTORATION - lf->restoration_level = vpx_rb_read_bit(rb); - if (lf->restoration_level) { - int level = vpx_rb_read_literal(rb, vp10_restoration_level_bits(cm)); - lf->restoration_level = level + (level >= lf->last_restoration_level); - } else { - lf->restoration_level = lf->last_restoration_level; - } - if (cm->frame_type != KEY_FRAME) - cm->lf.last_restoration_level = cm->lf.restoration_level; - else - cm->lf.last_restoration_level = 0; -#endif // CONFIG_LOOP_RESTORATION } static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { @@ -3135,6 +3152,9 @@ static size_t read_uncompressed_header(VP10Decoder *pbi, vp10_setup_past_independence(cm); setup_loopfilter(cm, rb); +#if CONFIG_LOOP_RESTORATION + setup_restoration(cm, rb); +#endif // CONFIG_LOOP_RESTORATION setup_quantization(cm, rb); #if CONFIG_VP9_HIGHBITDEPTH xd->bd = (int)cm->bit_depth; @@ -3499,9 +3519,10 @@ void vp10_decode_frame(VP10Decoder *pbi, *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); } #if CONFIG_LOOP_RESTORATION - vp10_loop_restoration_init(&cm->rst_info, cm->lf.restoration_level, - cm->frame_type == KEY_FRAME); - if (cm->rst_info.restoration_used) { + if (cm->rst_info.restoration_type != RESTORE_NONE) { + vp10_loop_restoration_init(&cm->rst_internal, + &cm->rst_info, + cm->frame_type == KEY_FRAME); vp10_loop_restoration_rows(new_fb, cm, 0, cm->mi_rows, 0); } #endif // CONFIG_LOOP_RESTORATION diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 73111c849..1c59b351f 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -1795,6 +1795,35 @@ static void update_coef_probs(VP10_COMP *cpi, vpx_writer* w) { } } +#if CONFIG_LOOP_RESTORATION +static void encode_restoration(VP10_COMMON *cm, + struct vpx_write_bit_buffer *wb) { + RestorationInfo *rst = &cm->rst_info; + vpx_wb_write_bit(wb, rst->restoration_type != RESTORE_NONE); + if (rst->restoration_type != RESTORE_NONE) { + if (rst->restoration_type == RESTORE_BILATERAL) { + vpx_wb_write_bit(wb, 1); + vpx_wb_write_literal(wb, rst->restoration_level, + vp10_restoration_level_bits(cm)); + } else { + vpx_wb_write_bit(wb, 0); + vpx_wb_write_literal( + wb, rst->vfilter[0] - WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_BITS); + vpx_wb_write_literal( + wb, rst->vfilter[1] - WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_BITS); + vpx_wb_write_literal( + wb, rst->vfilter[2] - WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_BITS); + vpx_wb_write_literal( + wb, rst->hfilter[0] - WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_BITS); + vpx_wb_write_literal( + wb, rst->hfilter[1] - WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_BITS); + vpx_wb_write_literal( + wb, rst->hfilter[2] - WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_BITS); + } + } +} +#endif // CONFIG_LOOP_RESTORATION + static void encode_loopfilter(VP10_COMMON *cm, struct vpx_write_bit_buffer *wb) { int i; @@ -1832,15 +1861,6 @@ static void encode_loopfilter(VP10_COMMON *cm, } } } -#if CONFIG_LOOP_RESTORATION - vpx_wb_write_bit(wb, lf->restoration_level != lf->last_restoration_level); - if (lf->restoration_level != lf->last_restoration_level) { - int level = lf->restoration_level - - (lf->restoration_level > lf->last_restoration_level); - vpx_wb_write_literal(wb, level, - vp10_restoration_level_bits(cm)); - } -#endif // CONFIG_LOOP_RESTORATION } static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) { @@ -2300,6 +2320,9 @@ static void write_uncompressed_header(VP10_COMP *cpi, vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2); encode_loopfilter(cm, wb); +#if CONFIG_LOOP_RESTORATION + encode_restoration(cm, wb); +#endif // CONFIG_LOOP_RESTORATION encode_quantization(cm, wb); encode_segmentation(cm, xd, wb); if (!cm->seg.enabled && xd->lossless[0]) diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 80bf47cc9..4c4261e2d 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -36,6 +36,9 @@ #include "vp10/encoder/firstpass.h" #include "vp10/encoder/mbgraph.h" #include "vp10/encoder/picklpf.h" +#if CONFIG_LOOP_RESTORATION +#include "vp10/encoder/pickrst.h" +#endif // CONFIG_LOOP_RESTORATION #include "vp10/encoder/ratectrl.h" #include "vp10/encoder/rd.h" #include "vp10/encoder/resize.h" @@ -2738,7 +2741,11 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) { vpx_usec_timer_start(&timer); +#if CONFIG_LOOP_RESTORATION + vp10_pick_filter_restoration(cpi->Source, cpi, cpi->sf.lpf_pick); +#else vp10_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); +#endif // CONFIG_LOOP_RESTORATION vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); @@ -2758,10 +2765,11 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) { #endif } #if CONFIG_LOOP_RESTORATION - vp10_loop_restoration_init(&cm->rst_info, cm->lf.restoration_level, - cm->frame_type == KEY_FRAME); - if (cm->rst_info.restoration_used) + if (cm->rst_info.restoration_type != RESTORE_NONE) { + vp10_loop_restoration_init(&cm->rst_internal, &cm->rst_info, + cm->frame_type == KEY_FRAME); vp10_loop_restoration_rows(cm->frame_to_show, cm, 0, cm->mi_rows, 0); + } #endif // CONFIG_LOOP_RESTORATION vpx_extend_frame_inner_borders(cm->frame_to_show); @@ -3871,12 +3879,6 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi, cm->last2_frame_type = cm->last_frame_type; #endif // CONFIG_EXT_REFS cm->last_frame_type = cm->frame_type; -#if CONFIG_LOOP_RESTORATION - if (cm->frame_type != KEY_FRAME) - cm->lf.last_restoration_level = cm->lf.restoration_level; - else - cm->lf.last_restoration_level = 0; -#endif // CONFIG_LOOP_RESTORATION vp10_rc_postencode_update(cpi, *size); diff --git a/vp10/encoder/picklpf.c b/vp10/encoder/picklpf.c index 85735a48a..f116c0008 100644 --- a/vp10/encoder/picklpf.c +++ b/vp10/encoder/picklpf.c @@ -25,7 +25,7 @@ #include "vp10/encoder/picklpf.h" #include "vp10/encoder/quantize.h" -static int get_max_filter_level(const VP10_COMP *cpi) { +int vp10_get_max_filter_level(const VP10_COMP *cpi) { if (cpi->oxcf.pass == 2) { return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 : MAX_LOOP_FILTER; @@ -34,8 +34,6 @@ static int get_max_filter_level(const VP10_COMP *cpi) { } } -#if !CONFIG_LOOP_RESTORATION -#if !JOINT_FILTER_RESTORATION_SEARCH static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP10_COMP *const cpi, int filt_level, int partial_frame) { @@ -71,191 +69,17 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, return filt_err; } -#endif -#endif - -#if CONFIG_LOOP_RESTORATION -#define JOINT_FILTER_RESTORATION_SEARCH -#define USE_RD_LOOP_POSTFILTER_SEARCH -static int try_restoration_frame(const YV12_BUFFER_CONFIG *sd, - VP10_COMP *const cpi, - int restoration_level, - int partial_frame) { - VP10_COMMON *const cm = &cpi->common; - int filt_err; - vp10_loop_restoration_frame(cm->frame_to_show, cm, - restoration_level, 1, partial_frame); -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - filt_err = vp10_highbd_get_y_sse(sd, cm->frame_to_show); - } else { - filt_err = vp10_get_y_sse(sd, cm->frame_to_show); - } -#else - filt_err = vp10_get_y_sse(sd, cm->frame_to_show); -#endif // CONFIG_VP9_HIGHBITDEPTH - // Re-instate the unfiltered frame - vpx_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show); - return filt_err; -} - -static int search_restoration_level(const YV12_BUFFER_CONFIG *sd, - VP10_COMP *cpi, - int filter_level, int partial_frame, - double *best_cost_ret) { - VP10_COMMON *const cm = &cpi->common; - int i, restoration_best, err; - double best_cost; - double cost; - const int restoration_level_bits = vp10_restoration_level_bits(&cpi->common); - const int restoration_levels = 1 << restoration_level_bits; -#ifdef USE_RD_LOOP_POSTFILTER_SEARCH - MACROBLOCK *x = &cpi->td.mb; - int bits; -#endif - - // Make a copy of the unfiltered / processed recon buffer - vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); - vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level, - 1, partial_frame); - vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db); - - restoration_best = 0; - err = try_restoration_frame(sd, cpi, 0, partial_frame); -#ifdef USE_RD_LOOP_POSTFILTER_SEARCH - bits = cm->lf.last_restoration_level == 0 ? 0 : restoration_level_bits; - cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err); -#else - cost = (double)err; -#endif // USE_RD_LOOP_POSTFILTER_SEARCH - best_cost = cost; - for (i = 1; i <= restoration_levels; ++i) { - err = try_restoration_frame(sd, cpi, i, partial_frame); -#ifdef USE_RD_LOOP_POSTFILTER_SEARCH - // Normally the rate is rate in bits * 256 and dist is sum sq err * 64 - // when RDCOST is used. However below we just scale both in the correct - // ratios appropriately but not exactly by these values. - bits = cm->lf.last_restoration_level == i ? 0 : restoration_level_bits; - cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err); -#else - cost = (double)err; -#endif // USE_RD_LOOP_POSTFILTER_SEARCH - if (cost < best_cost) { - restoration_best = i; - best_cost = cost; - } - } - if (best_cost_ret) *best_cost_ret = best_cost; - vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - return restoration_best; -} - -#ifdef JOINT_FILTER_RESTORATION_SEARCH -static int search_filter_restoration_level(const YV12_BUFFER_CONFIG *sd, - VP10_COMP *cpi, - int partial_frame, - int *restoration_level) { +int vp10_search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, + int partial_frame, double *best_cost_ret) { const VP10_COMMON *const cm = &cpi->common; const struct loopfilter *const lf = &cm->lf; const int min_filter_level = 0; - const int max_filter_level = get_max_filter_level(cpi); - int filt_direction = 0; - int filt_best, restoration_best; - double best_err; - int i; - - // Start the search at the previous frame filter level unless it is now out of - // range. - int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); - int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; - double ss_err[MAX_LOOP_FILTER + 1]; - int bilateral; - - // Set each entry to -1 - for (i = 0; i <= MAX_LOOP_FILTER; ++i) - ss_err[i] = -1.0; - - bilateral = search_restoration_level(sd, cpi, filt_mid, - partial_frame, &best_err); - filt_best = filt_mid; - restoration_best = bilateral; - ss_err[filt_mid] = best_err; - - while (filter_step > 0) { - const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level); - const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level); - - // Bias against raising loop filter in favor of lowering it. - double bias = (best_err / (1 << (15 - (filt_mid / 8)))) * filter_step; - - if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20)) - bias = (bias * cpi->twopass.section_intra_rating) / 20; - - // yx, bias less for large block size - if (cm->tx_mode != ONLY_4X4) - bias /= 2; - - if (filt_direction <= 0 && filt_low != filt_mid) { - // Get Low filter error score - if (ss_err[filt_low] < 0) { - bilateral = search_restoration_level(sd, cpi, filt_low, - partial_frame, - &ss_err[filt_low]); - } - // If value is close to the best so far then bias towards a lower loop - // filter value. - if ((ss_err[filt_low] - bias) < best_err) { - // Was it actually better than the previous best? - if (ss_err[filt_low] < best_err) { - best_err = ss_err[filt_low]; - } - - filt_best = filt_low; - restoration_best = bilateral; - } - } - - // Now look at filt_high - if (filt_direction >= 0 && filt_high != filt_mid) { - if (ss_err[filt_high] < 0) { - bilateral = search_restoration_level(sd, cpi, filt_high, partial_frame, - &ss_err[filt_high]); - } - // Was it better than the previous best? - if (ss_err[filt_high] < (best_err - bias)) { - best_err = ss_err[filt_high]; - filt_best = filt_high; - restoration_best = bilateral; - } - } - - // Half the step distance if the best filter value was the same as last time - if (filt_best == filt_mid) { - filter_step /= 2; - filt_direction = 0; - } else { - filt_direction = (filt_best < filt_mid) ? -1 : 1; - filt_mid = filt_best; - } - } - *restoration_level = restoration_best; - return filt_best; -} -#endif // JOINT_FILTER_RESTORATION_SEARCH -#endif // CONFIG_LOOP_RESTORATION - -#if !CONFIG_LOOP_RESTORATION -#if !JOINT_FILTER_RESTORATION_SEARCH -static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, - int partial_frame) { - const VP10_COMMON *const cm = &cpi->common; - const struct loopfilter *const lf = &cm->lf; - const int min_filter_level = 0; - const int max_filter_level = get_max_filter_level(cpi); + const int max_filter_level = vp10_get_max_filter_level(cpi); int filt_direction = 0; int64_t best_err; int filt_best; + MACROBLOCK *x = &cpi->td.mb; // Start the search at the previous frame filter level unless it is now out of // range. @@ -325,12 +149,12 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, filt_mid = filt_best; } } - + if (best_cost_ret) + *best_cost_ret = RDCOST_DBL(x->rdmult, x->rddiv, 0, best_err); return filt_best; } -#endif -#endif +#if !CONFIG_LOOP_RESTORATION void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, LPF_PICK_METHOD method) { VP10_COMMON *const cm = &cpi->common; @@ -343,7 +167,7 @@ void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, lf->filter_level = 0; } else if (method >= LPF_PICK_FROM_Q) { const int min_filter_level = 0; - const int max_filter_level = get_max_filter_level(cpi); + const int max_filter_level = vp10_get_max_filter_level(cpi); const int q = vp10_ac_quant(cm->base_qindex, 0, cm->bit_depth); // These values were determined by linear fitting the result of the // searched level, filt_guess = q * 0.316206 + 3.87252 @@ -370,24 +194,9 @@ void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, if (cm->frame_type == KEY_FRAME) filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); -#if CONFIG_LOOP_RESTORATION - lf->restoration_level = search_restoration_level( - sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, NULL); -#endif // CONFIG_LOOP_RESTORATION } else { -#if CONFIG_LOOP_RESTORATION -#ifdef JOINT_FILTER_RESTORATION_SEARCH - lf->filter_level = search_filter_restoration_level( - sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, &lf->restoration_level); -#else - lf->filter_level = search_filter_level( - sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); - lf->restoration_level = search_restoration_level( - sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, NULL); -#endif // JOINT_FILTER_RESTORATION_SEARCH -#else - lf->filter_level = search_filter_level( - sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); -#endif // CONFIG_LOOP_RESTORATION + lf->filter_level = vp10_search_filter_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL); } } +#endif // !CONFIG_LOOP_RESTORATION diff --git a/vp10/encoder/picklpf.h b/vp10/encoder/picklpf.h index 21a8758ef..29ec97609 100644 --- a/vp10/encoder/picklpf.h +++ b/vp10/encoder/picklpf.h @@ -20,9 +20,11 @@ extern "C" { struct yv12_buffer_config; struct VP10_COMP; - +int vp10_get_max_filter_level(const VP10_COMP *cpi); +int vp10_search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, + int partial_frame, double *err); void vp10_pick_filter_level(const struct yv12_buffer_config *sd, - struct VP10_COMP *cpi, LPF_PICK_METHOD method); + struct VP10_COMP *cpi, LPF_PICK_METHOD method); #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/encoder/pickrst.c b/vp10/encoder/pickrst.c new file mode 100644 index 000000000..79cda43ab --- /dev/null +++ b/vp10/encoder/pickrst.c @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "./vpx_scale_rtcd.h" + +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" + +#include "vp10/common/onyxc_int.h" +#include "vp10/common/quant_common.h" + +#include "vp10/encoder/encoder.h" +#include "vp10/encoder/quantize.h" +#include "vp10/encoder/picklpf.h" +#include "vp10/encoder/pickrst.h" + +static int try_restoration_frame(const YV12_BUFFER_CONFIG *sd, + VP10_COMP *const cpi, + RestorationInfo *rsi, + int partial_frame) { + VP10_COMMON *const cm = &cpi->common; + int filt_err; + vp10_loop_restoration_frame(cm->frame_to_show, cm, + rsi, 1, partial_frame); +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + filt_err = vp10_highbd_get_y_sse(sd, cm->frame_to_show); + } else { + filt_err = vp10_get_y_sse(sd, cm->frame_to_show); + } +#else + filt_err = vp10_get_y_sse(sd, cm->frame_to_show); +#endif // CONFIG_VP9_HIGHBITDEPTH + + // Re-instate the unfiltered frame + vpx_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show); + return filt_err; +} + +static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd, + VP10_COMP *cpi, + int filter_level, int partial_frame, + double *best_cost_ret) { + VP10_COMMON *const cm = &cpi->common; + int i, restoration_best, err; + double best_cost; + double cost; + const int restoration_level_bits = vp10_restoration_level_bits(&cpi->common); + const int restoration_levels = 1 << restoration_level_bits; + MACROBLOCK *x = &cpi->td.mb; + int bits; + RestorationInfo rsi; + + // Make a copy of the unfiltered / processed recon buffer + vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); + vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level, + 1, partial_frame); + vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db); + + restoration_best = -1; + rsi.restoration_type = RESTORE_NONE; + err = try_restoration_frame(sd, cpi, &rsi, partial_frame); + bits = 0; + best_cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err); + for (i = 0; i < restoration_levels; ++i) { + rsi.restoration_type = RESTORE_BILATERAL; + rsi.restoration_level = i; + err = try_restoration_frame(sd, cpi, &rsi, partial_frame); + // Normally the rate is rate in bits * 256 and dist is sum sq err * 64 + // when RDCOST is used. However below we just scale both in the correct + // ratios appropriately but not exactly by these values. + bits = restoration_level_bits; + cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err); + if (cost < best_cost) { + restoration_best = i; + best_cost = cost; + } + } + if (best_cost_ret) *best_cost_ret = best_cost; + vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); + return restoration_best; +} + +static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd, + VP10_COMP *cpi, + int partial_frame, + int *restoration_level, + double *best_cost_ret) { + const VP10_COMMON *const cm = &cpi->common; + const struct loopfilter *const lf = &cm->lf; + const int min_filter_level = 0; + const int max_filter_level = vp10_get_max_filter_level(cpi); + int filt_direction = 0; + int filt_best, restoration_best; + double best_err; + int i; + int bilateral_lev; + + // Start the search at the previous frame filter level unless it is now out of + // range. + int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); + int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; + double ss_err[MAX_LOOP_FILTER + 1]; + + // Set each entry to -1 + for (i = 0; i <= MAX_LOOP_FILTER; ++i) + ss_err[i] = -1.0; + + bilateral_lev = search_bilateral_level(sd, cpi, filt_mid, + partial_frame, &best_err); + filt_best = filt_mid; + restoration_best = bilateral_lev; + ss_err[filt_mid] = best_err; + + while (filter_step > 0) { + const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level); + const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level); + + // Bias against raising loop filter in favor of lowering it. + double bias = (best_err / (1 << (15 - (filt_mid / 8)))) * filter_step; + + if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20)) + bias = (bias * cpi->twopass.section_intra_rating) / 20; + + // yx, bias less for large block size + if (cm->tx_mode != ONLY_4X4) + bias /= 2; + + if (filt_direction <= 0 && filt_low != filt_mid) { + // Get Low filter error score + if (ss_err[filt_low] < 0) { + bilateral_lev = search_bilateral_level(sd, cpi, filt_low, + partial_frame, &ss_err[filt_low]); + } + // If value is close to the best so far then bias towards a lower loop + // filter value. + if ((ss_err[filt_low] - bias) < best_err) { + // Was it actually better than the previous best? + if (ss_err[filt_low] < best_err) { + best_err = ss_err[filt_low]; + } + + filt_best = filt_low; + restoration_best = bilateral_lev; + } + } + + // Now look at filt_high + if (filt_direction >= 0 && filt_high != filt_mid) { + if (ss_err[filt_high] < 0) { + bilateral_lev = search_bilateral_level( + sd, cpi, filt_high, partial_frame, &ss_err[filt_high]); + } + // Was it better than the previous best? + if (ss_err[filt_high] < (best_err - bias)) { + best_err = ss_err[filt_high]; + filt_best = filt_high; + restoration_best = bilateral_lev; + } + } + + // Half the step distance if the best filter value was the same as last time + if (filt_best == filt_mid) { + filter_step /= 2; + filt_direction = 0; + } else { + filt_direction = (filt_best < filt_mid) ? -1 : 1; + filt_mid = filt_best; + } + } + *restoration_level = restoration_best; + if (best_cost_ret) *best_cost_ret = best_err; + return filt_best; +} + +static double find_average(uint8_t *src, int width, int height, int stride) { + uint64_t sum = 0; + double avg = 0; + int i, j; + for (i = 0; i < height; i++) + for (j = 0; j < width; j++) + sum += src[i * stride + j]; + avg = (double)sum / (height * width); + return avg; +} + +static void compute_stats(uint8_t *dgd, uint8_t *src, int width, int height, + int dgd_stride, int src_stride, + double *M, double *H) { + int i, j, k, l; + double Y[RESTORATION_WIN2]; + const double avg = find_average(dgd, width, height, dgd_stride); + + memset(M, 0, sizeof(*M) * RESTORATION_WIN2); + memset(H, 0, sizeof(*H) * RESTORATION_WIN2 * RESTORATION_WIN2); + for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; i++) { + for (j = RESTORATION_HALFWIN; j < width - RESTORATION_HALFWIN; j++) { + const double X = (double)src[i * src_stride + j] - avg; + int idx = 0; + for (k = -RESTORATION_HALFWIN; k <= RESTORATION_HALFWIN; k++) { + for (l = -RESTORATION_HALFWIN; l <= RESTORATION_HALFWIN; l++) { + Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg; + idx++; + } + } + for (k = 0; k < RESTORATION_WIN2; ++k) { + M[k] += Y[k] * X; + H[k * RESTORATION_WIN2 + k] += Y[k] * Y[k]; + for (l = k + 1; l < RESTORATION_WIN2; ++l) { + double value = Y[k] * Y[l]; + H[k * RESTORATION_WIN2 + l] += value; + H[l * RESTORATION_WIN2 + k] += value; + } + } + } + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static double find_average_highbd(uint16_t *src, + int width, int height, int stride) { + uint64_t sum = 0; + double avg = 0; + int i, j; + for (i = 0; i < height; i++) + for (j = 0; j < width; j++) + sum += src[i * stride + j]; + avg = (double)sum / (height * width); + return avg; +} + +static void compute_stats_highbd( + uint8_t *dgd8, uint8_t *src8, int width, int height, + int dgd_stride, int src_stride, double *M, double *H) { + int i, j, k, l; + double Y[RESTORATION_WIN2]; + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8); + const double avg = find_average_highbd(dgd, width, height, dgd_stride); + + memset(M, 0, sizeof(*M) * RESTORATION_WIN2); + memset(H, 0, sizeof(*H) * RESTORATION_WIN2 * RESTORATION_WIN2); + for (i = RESTORATION_HALFWIN; i < height - RESTORATION_HALFWIN; i++) { + for (j = RESTORATION_HALFWIN; j < width - RESTORATION_HALFWIN; j++) { + const double X = (double)src[i * src_stride + j] - avg; + int idx = 0; + for (k = -RESTORATION_HALFWIN; k <= RESTORATION_HALFWIN; k++) { + for (l = -RESTORATION_HALFWIN; l <= RESTORATION_HALFWIN; l++) { + Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg; + idx++; + } + } + for (k = 0; k < RESTORATION_WIN2; ++k) { + M[k] += Y[k] * X; + H[k * RESTORATION_WIN2 + k] += Y[k] * Y[k]; + for (l = k + 1; l < RESTORATION_WIN2; ++l) { + double value = Y[k] * Y[l]; + H[k * RESTORATION_WIN2 + l] += value; + H[l * RESTORATION_WIN2 + k] += value; + } + } + } + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +// Solves Ax = b, where x and b are column vectors +static int linsolve(int n, double *A, int stride, double *b, double *x) { + int i, j, k; + double c; + // Partial pivoting + for (i = n - 1; i > 0; i--) { + if (A[(i - 1) * stride] < A[i * stride]) { + for (j = 0; j < n; j++) { + c = A[i * stride + j]; + A[i * stride + j] = A[(i - 1) * stride + j]; + A[(i - 1) * stride + j] = c; + } + c = b[i]; + b[i] = b[i - 1]; + b[i - 1] = c; + } + } + // Forward elimination + for (k = 0; k < n - 1; k++) { + for (i = k; i < n - 1; i++) { + c = A[(i + 1) * stride + k] / A[k * stride + k]; + for (j = 0; j < n; j++) + A[(i + 1) * stride + j] -= c * A[k * stride + j]; + b[i + 1] -= c * b[k]; + } + } + // Backward substitution + for (i = n - 1; i >= 0; i--) { + if (fabs(A[i * stride + i]) < 1e-10) + return 0; + c = 0; + for (j = i + 1; j <= n - 1; j++) + c += A[i * stride + j] * x[j]; + x[i] = (b[i] - c) / A[i * stride + i]; + } + return 1; +} + +static INLINE int wrap_index(int i) { + return (i >= RESTORATION_HALFWIN1 ? RESTORATION_WIN - 1 - i : i); +} + +static void normalize_copy(double *v, int n) { + double s = 0.0; + int i; + for (i = 0; i < n; ++i) + s += v[i]; + s = 1.0 / s; + for (i = 0; i < n; ++i) v[i] *= s; +} + +// Fix vector b, update vector a +static void update_a_sep_sym(double **Mc, double **Hc, double *a, double *b) { + int i, j; + double S[RESTORATION_WIN]; + double A[RESTORATION_WIN], B[RESTORATION_WIN2]; + memset(A, 0, sizeof(A)); + memset(B, 0, sizeof(B)); + for (i = 0; i < RESTORATION_WIN; i ++) { + int j; + for (j = 0; j < RESTORATION_WIN; ++j) { + const int jj = wrap_index(j); + A[jj] += Mc[i][j] * b[i]; + } + } + + for (i = 0; i < RESTORATION_WIN; i ++) { + for (j = 0; j < RESTORATION_WIN; j ++) { + int k, l; + for (k = 0; k < RESTORATION_WIN; ++k) + for (l = 0; l < RESTORATION_WIN; ++l) { + const int kk = wrap_index(k); + const int ll = wrap_index(l); + B[ll * RESTORATION_HALFWIN1 + kk] += + Hc[j * RESTORATION_WIN + i][k * RESTORATION_WIN2 + l] * + b[i] * b[j]; + } + } + } + if (linsolve(RESTORATION_HALFWIN1, B, RESTORATION_HALFWIN1, A, S)) { + for (i = 0; i < RESTORATION_WIN; ++i) { + const int ii = wrap_index(i); + a[i] = S[ii]; + } + normalize_copy(a, RESTORATION_WIN); + } +} + +// Fix vector a, update vector b +static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) { + int i, j; + double S[RESTORATION_WIN]; + double A[RESTORATION_WIN], B[RESTORATION_WIN2]; + memset(A, 0, sizeof(A)); + memset(B, 0, sizeof(B)); + for (i = 0; i < RESTORATION_WIN; i ++) { + int j; + const int ii = wrap_index(i); + for (j = 0; j < RESTORATION_WIN; j ++) + A[ii] += Mc[i][j] * a[j]; + } + + for (i = 0; i < RESTORATION_WIN; i++) { + for (j = 0; j < RESTORATION_WIN; j++) { + const int ii = wrap_index(i); + const int jj = wrap_index(j); + int k, l; + for (k = 0; k < RESTORATION_WIN; ++k) + for (l = 0; l < RESTORATION_WIN; ++l) + B[jj * RESTORATION_HALFWIN1 + ii] += + Hc[i * RESTORATION_WIN + j][k * RESTORATION_WIN2 + l] * + a[k] * a[l]; + } + } + if (linsolve(RESTORATION_HALFWIN1, B, RESTORATION_HALFWIN1, A, S)) { + for (i = 0; i < RESTORATION_WIN; ++i) { + const int ii = wrap_index(i); + b[i] = S[ii]; + } + normalize_copy(b, RESTORATION_WIN); + } +} + +static void wiener_decompose_sep_sym(double *M, double *H, + double *a, double *b) { + static const double init_filt[RESTORATION_WIN] = { + 0.035623, -0.127154, 0.211436, 0.760190, 0.211436, -0.127154, 0.035623, + }; + int i, j, iter; + double *Hc[RESTORATION_WIN2]; + double *Mc[RESTORATION_WIN]; + for (i = 0; i < RESTORATION_WIN; i++) { + Mc[i] = M + i * RESTORATION_WIN; + for (j = 0; j < RESTORATION_WIN; j++) { + Hc[i * RESTORATION_WIN + j] = + H + i * RESTORATION_WIN * RESTORATION_WIN2 + j * RESTORATION_WIN; + } + } + memcpy(a, init_filt, sizeof(*a) * RESTORATION_WIN); + memcpy(b, init_filt, sizeof(*b) * RESTORATION_WIN); + + iter = 1; + while (iter < 10) { + update_a_sep_sym(Mc, Hc, a, b); + update_b_sep_sym(Mc, Hc, a, b); + iter++; + } +} + +#define CLIP(x, lo, hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x)) +#define RINT(x) ((x) < 0 ? (int)((x) - 0.5) : (int)((x) + 0.5)) + +static void quantize_sym_filter(double *f, int *fi) { + int i; + for (i = 0; i < RESTORATION_HALFWIN; ++i) { + fi[i] = RINT(f[i] * RESTORATION_FILT_STEP); + } + // Specialize for 7-tap filter + fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV); + fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV); + fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV); +} + +static int search_wiener_filter(const YV12_BUFFER_CONFIG *src, + VP10_COMP *cpi, + int filter_level, + int partial_frame, + int *vfilter, int *hfilter, + double *best_cost_ret) { + VP10_COMMON *const cm = &cpi->common; + RestorationInfo rsi; + int err, bits; + double cost_wiener, cost_norestore; + MACROBLOCK *x = &cpi->td.mb; + double M[RESTORATION_WIN2]; + double H[RESTORATION_WIN2 * RESTORATION_WIN2]; + double vfilterd[RESTORATION_WIN], hfilterd[RESTORATION_WIN]; + const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show; + const int width = cm->width; + const int height = cm->height; + const int src_stride = src->y_stride; + const int dgd_stride = dgd->y_stride; + + assert(width == dgd->y_crop_width); + assert(height == dgd->y_crop_height); + assert(width == src->y_crop_width); + assert(height == src->y_crop_height); + + // Make a copy of the unfiltered / processed recon buffer + vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); + vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level, + 1, partial_frame); + vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db); + + rsi.restoration_type = RESTORE_NONE; + err = try_restoration_frame(src, cpi, &rsi, partial_frame); + bits = 0; + cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err); + +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) + compute_stats_highbd(dgd->y_buffer, src->y_buffer, width, height, + dgd_stride, src_stride, M, H); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + compute_stats(dgd->y_buffer, src->y_buffer, width, height, + dgd_stride, src_stride, M, H); + + wiener_decompose_sep_sym(M, H, vfilterd, hfilterd); + quantize_sym_filter(vfilterd, vfilter); + quantize_sym_filter(hfilterd, hfilter); + + rsi.restoration_type = RESTORE_WIENER; + memcpy(rsi.vfilter, vfilter, sizeof(rsi.vfilter)); + memcpy(rsi.hfilter, hfilter, sizeof(rsi.hfilter)); + err = try_restoration_frame(src, cpi, &rsi, partial_frame); + bits = 22; + cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err); + + vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); + + if (cost_wiener < cost_norestore) { + if (best_cost_ret) *best_cost_ret = cost_wiener; + return 1; + } else { + if (best_cost_ret) *best_cost_ret = cost_norestore; + return 0; + } +} + +void vp10_pick_filter_restoration( + const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, LPF_PICK_METHOD method) { + VP10_COMMON *const cm = &cpi->common; + struct loopfilter *const lf = &cm->lf; + int wiener_success; + double cost_bilateral = 1e12; + double cost_wiener = 1e12; + double cost_norestore = 1e12; + + lf->sharpness_level = + cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness; + + if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) { + lf->filter_level = 0; + } else if (method >= LPF_PICK_FROM_Q) { + const int min_filter_level = 0; + const int max_filter_level = vp10_get_max_filter_level(cpi); + const int q = vp10_ac_quant(cm->base_qindex, 0, cm->bit_depth); + // These values were determined by linear fitting the result of the + // searched level, filt_guess = q * 0.316206 + 3.87252 +#if CONFIG_VP9_HIGHBITDEPTH + int filt_guess; + switch (cm->bit_depth) { + case VPX_BITS_8: + filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); + break; + case VPX_BITS_10: + filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20); + break; + case VPX_BITS_12: + filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22); + break; + default: + assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 " + "or VPX_BITS_12"); + return; + } +#else + int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); +#endif // CONFIG_VP9_HIGHBITDEPTH + if (cm->frame_type == KEY_FRAME) + filt_guess -= 4; + lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); + cm->rst_info.restoration_level = search_bilateral_level( + sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, + &cost_bilateral); + wiener_success = search_wiener_filter( + sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, + cm->rst_info.vfilter, cm->rst_info.hfilter, &cost_wiener); + if (cost_bilateral < cost_wiener) { + if (cm->rst_info.restoration_level != -1) + cm->rst_info.restoration_type = RESTORE_BILATERAL; + else + cm->rst_info.restoration_type = RESTORE_NONE; + } else { + if (wiener_success) + cm->rst_info.restoration_type = RESTORE_WIENER; + else + cm->rst_info.restoration_type = RESTORE_NONE; + } + } else { + int blf_filter_level = -1; + blf_filter_level = search_filter_bilateral_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, + &cm->rst_info.restoration_level, &cost_bilateral); + lf->filter_level = vp10_search_filter_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, &cost_norestore); + wiener_success = search_wiener_filter( + sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, + cm->rst_info.vfilter, cm->rst_info.hfilter, &cost_wiener); + // printf("Costs %g %g (%d) %g\n", + // cost_norestore, cost_bilateral, lf->filter_level, cost_wiener); + if (cost_bilateral < cost_wiener) { + lf->filter_level = blf_filter_level; + if (cm->rst_info.restoration_level != -1) + cm->rst_info.restoration_type = RESTORE_BILATERAL; + else + cm->rst_info.restoration_type = RESTORE_NONE; + } else { + if (wiener_success) + cm->rst_info.restoration_type = RESTORE_WIENER; + else + cm->rst_info.restoration_type = RESTORE_NONE; + } + } +} diff --git a/vp10/encoder/pickrst.h b/vp10/encoder/pickrst.h new file mode 100644 index 000000000..8e2340dfc --- /dev/null +++ b/vp10/encoder/pickrst.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP10_ENCODER_PICKRST_H_ +#define VP10_ENCODER_PICKRST_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "vp10/encoder/encoder.h" + +struct yv12_buffer_config; +struct VP10_COMP; + +void vp10_pick_filter_restoration( + const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, LPF_PICK_METHOD method); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP10_ENCODER_PICKRST_H_ diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk index 7ae2fb2d2..8d35a9be7 100644 --- a/vp10/vp10cx.mk +++ b/vp10/vp10cx.mk @@ -57,6 +57,8 @@ VP10_CX_SRCS-yes += encoder/palette.h VP10_CX_SRCS-yes += encoder/palette.c VP10_CX_SRCS-yes += encoder/picklpf.c VP10_CX_SRCS-yes += encoder/picklpf.h +VP10_CX_SRCS-$(CONFIG_LOOP_RESTORATION) += encoder/pickrst.c +VP10_CX_SRCS-$(CONFIG_LOOP_RESTORATION) += encoder/pickrst.h VP10_CX_SRCS-yes += encoder/quantize.c VP10_CX_SRCS-yes += encoder/ratectrl.c VP10_CX_SRCS-yes += encoder/rd.c