From 84ca7a9f0f6ad793c16428d18fa6c5d5a8ae0f37 Mon Sep 17 00:00:00 2001 From: Debargha Mukherjee Date: Tue, 19 Jan 2016 13:01:01 -0800 Subject: [PATCH] Loop restoration filter Current implementation is a bilateral filter whose parameters are transmitted in the bitstream. derflr: -0.647% BDRATE hevcmr: -0.794% BDRATE This is a prelimary patch. Various other variations are to be investigated next, that will hopefully be less expensive on the decoder side. Change-Id: I50634ae8f5014ad0bf7432306348908a349d81e1 --- vp10/common/alloccommon.c | 6 + vp10/common/alloccommon.h | 3 + vp10/common/entropymode.c | 4 + vp10/common/loopfilter.c | 274 +++++++++++++++++++++++++++++++++++++ vp10/common/loopfilter.h | 94 +++++++++++++ vp10/common/onyxc_int.h | 3 + vp10/decoder/decodeframe.c | 25 +++- vp10/encoder/bitstream.c | 14 +- vp10/encoder/encoder.c | 33 ++++- vp10/encoder/encoder.h | 3 + vp10/encoder/picklpf.c | 200 ++++++++++++++++++++++++++- vp10/encoder/rd.h | 4 + vp10/vp10_dx_iface.c | 3 + 13 files changed, 655 insertions(+), 11 deletions(-) diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c index 364afde47..e14aee76f 100644 --- a/vp10/common/alloccommon.c +++ b/vp10/common/alloccommon.c @@ -81,6 +81,12 @@ void vp10_free_ref_frame_buffers(BufferPool *pool) { } } +#if CONFIG_LOOP_RESTORATION +void vp10_free_restoration_buffers(VP10_COMMON *cm) { + vpx_free_frame_buffer(&cm->tmp_loop_buf); +} +#endif // CONFIG_LOOP_RESTORATION + void vp10_free_postproc_buffers(VP10_COMMON *cm) { #if CONFIG_VP9_POSTPROC vpx_free_frame_buffer(&cm->post_proc_buffer); diff --git a/vp10/common/alloccommon.h b/vp10/common/alloccommon.h index 5cfe6602d..f77833b7d 100644 --- a/vp10/common/alloccommon.h +++ b/vp10/common/alloccommon.h @@ -29,6 +29,9 @@ void vp10_free_context_buffers(struct VP10Common *cm); void vp10_free_ref_frame_buffers(struct BufferPool *pool); void vp10_free_postproc_buffers(struct VP10Common *cm); +#if CONFIG_LOOP_RESTORATION +void vp10_free_restoration_buffers(struct VP10Common *cm); +#endif // CONFIG_LOOP_RESTORATION int vp10_alloc_state_buffers(struct VP10Common *cm, int width, int height); void vp10_free_state_buffers(struct VP10Common *cm); diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index 92f00c485..3e1068065 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -1498,6 +1498,10 @@ void vp10_setup_past_independence(VP10_COMMON *cm) { // To force update of the sharpness lf->last_sharpness_level = -1; +#if CONFIG_LOOP_RESTORATION + lf->bilateral_level = 0; + lf->last_bilateral_level = 0; +#endif // CONFIG_LOOP_RESTORATION vp10_default_coef_probs(cm); init_mode_probs(cm->fc); diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 875030d0a..380312e6c 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include + #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vp10/common/loopfilter.h" @@ -212,6 +214,250 @@ static const int mode_lf_lut[MB_MODE_COUNT] = { #endif // CONFIG_EXT_INTER }; +#if CONFIG_LOOP_RESTORATION +#define BILATERAL_RANGE 256 +#define BILATERAL_RANGE_SYM (2 * BILATERAL_RANGE + 1) +static double bilateral_filters_r_kf[BILATERAL_LEVELS_KF + 1] + [BILATERAL_RANGE_SYM]; +static double bilateral_filters_r[BILATERAL_LEVELS + 1][BILATERAL_RANGE_SYM]; +static double bilateral_filters_s_kf[BILATERAL_LEVELS_KF + 1] + [BILATERAL_WIN][BILATERAL_WIN]; +static double bilateral_filters_s[BILATERAL_LEVELS + 1] + [BILATERAL_WIN][BILATERAL_WIN]; + +void vp10_loop_bilateral_precal() { + int i; + for (i = 1; i < BILATERAL_LEVELS_KF + 1; i ++) { + const bilateral_params_t param = vp10_bilateral_level_to_params(i, 1); + const int sigma_x = param.sigma_x; + const int sigma_y = param.sigma_y; + const int sigma_r = param.sigma_r; + const double sigma_r_d = (double)sigma_r / BILATERAL_PRECISION; + const double sigma_x_d = (double)sigma_x / BILATERAL_PRECISION; + const double sigma_y_d = (double)sigma_y / BILATERAL_PRECISION; + + double *fr = bilateral_filters_r_kf[i] + BILATERAL_RANGE; + int j, x, y; + for (j = 0; j <= BILATERAL_RANGE; j++) { + fr[j] = exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)); + fr[-j] = fr[j]; + } + for (y = -BILATERAL_HALFWIN; y <= BILATERAL_HALFWIN; y++) { + for (x = -BILATERAL_HALFWIN; x <= BILATERAL_HALFWIN; x++) { + bilateral_filters_s_kf[i][y + BILATERAL_HALFWIN] + [x + BILATERAL_HALFWIN] = + exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) + -(y * y) / (2 * sigma_y_d * sigma_y_d)); + } + } + } + for (i = 1; i < BILATERAL_LEVELS + 1; i ++) { + const bilateral_params_t param = vp10_bilateral_level_to_params(i, 0); + const int sigma_x = param.sigma_x; + const int sigma_y = param.sigma_y; + const int sigma_r = param.sigma_r; + const double sigma_r_d = (double)sigma_r / BILATERAL_PRECISION; + const double sigma_x_d = (double)sigma_x / BILATERAL_PRECISION; + const double sigma_y_d = (double)sigma_y / BILATERAL_PRECISION; + + double *fr = bilateral_filters_r[i] + BILATERAL_RANGE; + int j, x, y; + for (j = 0; j <= BILATERAL_RANGE; j++) { + fr[j] = exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)); + fr[-j] = fr[j]; + } + for (y = -BILATERAL_HALFWIN; y <= BILATERAL_HALFWIN; y++) { + for (x = -BILATERAL_HALFWIN; x <= BILATERAL_HALFWIN; x++) { + bilateral_filters_s[i][y + BILATERAL_HALFWIN][x + BILATERAL_HALFWIN] = + exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) + -(y * y) / (2 * sigma_y_d * sigma_y_d)); + } + } + } +} + +int vp10_bilateral_level_bits(const VP10_COMMON *const cm) { + return cm->frame_type == KEY_FRAME ? + BILATERAL_LEVEL_BITS_KF : BILATERAL_LEVEL_BITS; +} + +int vp10_loop_bilateral_used(int level, int kf) { + const bilateral_params_t param = vp10_bilateral_level_to_params(level, kf); + return (param.sigma_x && param.sigma_y && param.sigma_r); +} + +void vp10_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) { + lfi->bilateral_used = vp10_loop_bilateral_used(level, kf); + + if (lfi->bilateral_used) { + int i; + lfi->wr_lut = kf ? bilateral_filters_r_kf[level] : + bilateral_filters_r[level]; + for (i = 0; i < BILATERAL_WIN; i++) + lfi->wx_lut[i] = kf ? bilateral_filters_s_kf[level][i] : + bilateral_filters_s[level][i]; + } +} + +static int is_in_image(int x, int y, int width, int height) { + return (x >= 0 && x < width && y >= 0 && y < height); +} + +void loop_bilateral_filter(uint8_t *data, int width, int height, + int stride, loop_filter_info_n *lfi, + uint8_t *tmpdata, int tmpstride) { + int i, j; + const double *wr_lut_ = lfi->wr_lut + 256; + + uint8_t *data_p = data; + uint8_t *tmpdata_p = tmpdata; + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + int x, y; + double flsum = 0, wtsum = 0, wt; + uint8_t *data_p2 = data_p + j - BILATERAL_HALFWIN * stride; + for (y = -BILATERAL_HALFWIN; y <= BILATERAL_HALFWIN; ++y) { + for (x = -BILATERAL_HALFWIN; x <= BILATERAL_HALFWIN; ++x) { + if (!is_in_image(j + x, i + y, width, height)) + continue; + wt = lfi->wx_lut[y + BILATERAL_HALFWIN][x + BILATERAL_HALFWIN] * + wr_lut_[data_p2[x] - data_p[j]]; + wtsum += wt; + flsum += wt * data_p2[x]; + } + data_p2 += stride; + } + assert(wtsum > 0); + tmpdata_p[j] = clip_pixel((int)(flsum / wtsum + 0.5)); + } + tmpdata_p += tmpstride; + data_p += stride; + } + + for (i = 0; i < height; ++i) { + memcpy(data + i * stride, tmpdata + i * tmpstride, + width * sizeof(*data)); + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void loop_bilateral_filter_highbd(uint8_t *data8, int width, int height, + int stride, loop_filter_info_n *lfi, + uint8_t *tmpdata8, int tmpstride, + int bit_depth) { + int i, j; + const double *wr_lut_ = lfi->wr_lut + 256; + + uint16_t *data = CONVERT_TO_SHORTPTR(data8); + uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8); + uint16_t *data_p = data; + uint16_t *tmpdata_p = tmpdata; + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + int x, y, diff_r; + double flsum = 0, wtsum = 0, wt; + uint16_t *data_p2 = data_p + j - BILATERAL_HALFWIN * stride; + + for (y = -BILATERAL_HALFWIN; y <= BILATERAL_HALFWIN; ++y) { + for (x = -BILATERAL_HALFWIN; x <= BILATERAL_HALFWIN; ++x) { + if (!is_in_image(j + x, i + y, width, height)) + continue; + + diff_r = (data_p2[x] - data_p[j]) >> (bit_depth - 8); + assert(diff_r >= -256 && diff_r <= 256); + + wt = lfi->wx_lut[y + BILATERAL_HALFWIN][x + BILATERAL_HALFWIN] * + wr_lut_[diff_r]; + wtsum += wt; + flsum += wt * data_p2[x]; + } + data_p2 += stride; + } + + assert(wtsum > 0); + tmpdata_p[j] = (int)(flsum / wtsum + 0.5); + } + tmpdata_p += tmpstride; + data_p += stride; + } + + for (i = 0; i < height; ++i) { + memcpy(data + i * stride, tmpdata + i * tmpstride, + width * sizeof(*data)); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp10_loop_bilateral_rows(YV12_BUFFER_CONFIG *frame, + VP10_COMMON *cm, + int start_mi_row, int end_mi_row, + int y_only) { + const int ywidth = frame->y_crop_width; + const int ystride = frame->y_stride; + const int uvwidth = frame->uv_crop_width; + const int uvstride = frame->uv_stride; + const int ystart = start_mi_row << MI_SIZE_LOG2; + const int uvstart = ystart >> cm->subsampling_y; + int yend = end_mi_row << MI_SIZE_LOG2; + int uvend = yend >> cm->subsampling_y; + YV12_BUFFER_CONFIG *tmp_buf; + yend = VPXMIN(yend, cm->height); + uvend = VPXMIN(uvend, cm->subsampling_y ? (cm->height + 1) >> 1 : cm->height); + + if (vpx_realloc_frame_buffer(&cm->tmp_loop_buf, cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_DEC_BORDER_IN_PIXELS, cm->byte_alignment, + NULL, NULL, NULL) < 0) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate tmp restoration buffer"); + + tmp_buf = &cm->tmp_loop_buf; + +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) + loop_bilateral_filter_highbd(frame->y_buffer + ystart * ystride, + ywidth, yend - ystart, ystride, &cm->lf_info, + tmp_buf->y_buffer + ystart * tmp_buf->y_stride, + tmp_buf->y_stride, cm->bit_depth); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + loop_bilateral_filter(frame->y_buffer + ystart * ystride, + ywidth, yend - ystart, ystride, &cm->lf_info, + tmp_buf->y_buffer + ystart * tmp_buf->y_stride, + tmp_buf->y_stride); + if (!y_only) { +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + loop_bilateral_filter_highbd( + frame->u_buffer + uvstart * uvstride, + uvwidth, uvend - uvstart, uvstride, &cm->lf_info, + tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride, + tmp_buf->uv_stride, cm->bit_depth); + loop_bilateral_filter_highbd( + frame->v_buffer + uvstart * uvstride, + uvwidth, uvend - uvstart, uvstride, &cm->lf_info, + tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride, + tmp_buf->uv_stride, cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + loop_bilateral_filter(frame->u_buffer + uvstart * uvstride, + uvwidth, uvend - uvstart, uvstride, &cm->lf_info, + tmp_buf->u_buffer + uvstart * tmp_buf->uv_stride, + tmp_buf->uv_stride); + loop_bilateral_filter(frame->v_buffer + uvstart * uvstride, + uvwidth, uvend - uvstart, uvstride, &cm->lf_info, + tmp_buf->v_buffer + uvstart * tmp_buf->uv_stride, + tmp_buf->uv_stride); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } +} +#endif // CONFIG_LOOP_RESTORATION + static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { int lvl; @@ -252,6 +498,10 @@ void vp10_loop_filter_init(VP10_COMMON *cm) { // init hev threshold const vectors for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); + +#if CONFIG_LOOP_RESTORATION + vp10_loop_bilateral_precal(); +#endif // CONFIG_LOOP_RESTORATION } void vp10_loop_filter_frame_init(VP10_COMMON *cm, int default_filt_lvl) { @@ -1721,6 +1971,30 @@ void vp10_loop_filter_data_reset( memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); } +#if CONFIG_LOOP_RESTORATION +void vp10_loop_bilateral_frame(YV12_BUFFER_CONFIG *frame, + VP10_COMMON *cm, + int bilateral_level, + int y_only, int partial_frame) { + int start_mi_row, end_mi_row, mi_rows_to_filter; + // const int loop_bilateral_used = vp10_loop_bilateral_used( + // bilateral_level, cm->frame_type == KEY_FRAME); + vp10_loop_bilateral_init(&cm->lf_info, bilateral_level, + cm->frame_type == KEY_FRAME); + if (!cm->lf_info.bilateral_used) + return; + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + vp10_loop_bilateral_rows(frame, cm, start_mi_row, end_mi_row, y_only); +} +#endif // CONFIG_LOOP_RESTORATION + int vp10_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { (void)unused; vp10_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h index 3d764394e..02bcb26e9 100644 --- a/vp10/common/loopfilter.h +++ b/vp10/common/loopfilter.h @@ -28,6 +28,69 @@ extern "C" { #define MAX_MODE_LF_DELTAS 2 +#if CONFIG_LOOP_RESTORATION +#define BILATERAL_LEVEL_BITS_KF 4 +#define BILATERAL_LEVELS_KF (1 << BILATERAL_LEVEL_BITS_KF) +#define BILATERAL_LEVEL_BITS 3 +#define BILATERAL_LEVELS (1 << BILATERAL_LEVEL_BITS) +#define DEF_BILATERAL_LEVEL 2 + +#define BILATERAL_PRECISION 16 +#define BILATERAL_HALFWIN 3 +#define BILATERAL_WIN (2 * BILATERAL_HALFWIN + 1) + +typedef struct bilateral_params { + int sigma_x; // spatial variance x + int sigma_y; // spatial variance y + int sigma_r; // range variance +} bilateral_params_t; + +static bilateral_params_t + bilateral_level_to_params_arr[BILATERAL_LEVELS + 1] = { + // Values are rounded to 1/16 th precision + {0, 0, 0}, // 0 - default + {8, 9, 30}, + {9, 8, 30}, + {9, 11, 32}, + {11, 9, 32}, + {14, 14, 32}, + {18, 18, 36}, + {24, 24, 40}, + {32, 32, 40}, +}; + +static bilateral_params_t + bilateral_level_to_params_arr_kf[BILATERAL_LEVELS_KF + 1] = { + // Values are rounded to 1/16 th precision + {0, 0, 0}, // 0 - default + {8, 8, 30}, + {9, 9, 32}, + {10, 10, 32}, + {12, 12, 32}, + {14, 14, 32}, + {18, 18, 36}, + {24, 24, 40}, + {30, 30, 44}, + {36, 36, 48}, + {42, 42, 48}, + {48, 48, 48}, + {48, 48, 56}, + {56, 56, 48}, + {56, 56, 56}, + {56, 56, 64}, + {64, 64, 48}, +}; + +int vp10_bilateral_level_bits(const struct VP10Common *const cm); +int vp10_loop_bilateral_used(int level, int kf); + +static INLINE bilateral_params_t vp10_bilateral_level_to_params( + int index, int kf) { + return kf ? bilateral_level_to_params_arr_kf[index] : + bilateral_level_to_params_arr[index]; +} +#endif // CONFIG_LOOP_RESTORATION + enum lf_path { LF_PATH_420, LF_PATH_444, @@ -51,6 +114,11 @@ struct loopfilter { // 0 = ZERO_MV, MV signed char mode_deltas[MAX_MODE_LF_DELTAS]; signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; + +#if CONFIG_LOOP_RESTORATION + int bilateral_level; + int last_bilateral_level; +#endif // CONFIG_LOOP_RESTORATION }; // Need to align this structure so when it is declared and @@ -64,6 +132,14 @@ typedef struct { typedef struct { loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; +#if CONFIG_LOOP_RESTORATION + double * wx_lut[BILATERAL_WIN]; + double * wr_lut; + int bilateral_sigma_x_set; + int bilateral_sigma_y_set; + int bilateral_sigma_r_set; + int bilateral_used; +#endif // CONFIG_LOOP_RESTORATION } loop_filter_info_n; // This structure holds bit masks for all 8x8 blocks in a 64x64 region. @@ -133,6 +209,24 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only); +#if CONFIG_LOOP_RESTORATION +void vp10_loop_bilateral_frame(YV12_BUFFER_CONFIG *frame, + struct VP10Common *cm, + int bilateral_level, + int y_only, int partial_frame); +void vp10_loop_filter_bilateral_frame(YV12_BUFFER_CONFIG *frame, + struct VP10Common *cm, + struct macroblockd *mbd, + int frame_filter_level, + int bilateral_level, + int y_only, int partial_frame); +void vp10_loop_bilateral_init(loop_filter_info_n *lfi, int T, int kf); +void vp10_loop_bilateral_rows(YV12_BUFFER_CONFIG *frame, + struct VP10Common *cm, + int start_mi_row, int end_mi_row, + int y_only); +#endif // CONFIG_LOOP_RESTORATION + typedef struct LoopFilterWorkerData { YV12_BUFFER_CONFIG *frame_buffer; struct VP10Common *cm; diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index 9b7a729be..23a20d439 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -172,6 +172,9 @@ typedef struct VP10Common { YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG post_proc_buffer_int; #endif +#if CONFIG_LOOP_RESTORATION + YV12_BUFFER_CONFIG tmp_loop_buf; +#endif // CONFIG_LOOP_RESTORATION FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ #if CONFIG_EXT_REFS diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 10fdb54f8..33e8332ea 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -2102,8 +2102,9 @@ static void setup_segmentation(VP10_COMMON *const cm, } } -static void setup_loopfilter(struct loopfilter *lf, +static void setup_loopfilter(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) { + struct loopfilter *lf = &cm->lf; lf->filter_level = vpx_rb_read_literal(rb, 6); lf->sharpness_level = vpx_rb_read_literal(rb, 3); @@ -2126,6 +2127,19 @@ static void setup_loopfilter(struct loopfilter *lf, lf->mode_deltas[i] = vpx_rb_read_inv_signed_literal(rb, 6); } } +#if CONFIG_LOOP_RESTORATION + lf->bilateral_level = vpx_rb_read_bit(rb); + if (lf->bilateral_level) { + int level = vpx_rb_read_literal(rb, vp10_bilateral_level_bits(cm)); + lf->bilateral_level = level + (level >= lf->last_bilateral_level); + } else { + lf->bilateral_level = lf->last_bilateral_level; + } + if (cm->frame_type != KEY_FRAME) + cm->lf.last_bilateral_level = cm->lf.bilateral_level; + else + cm->lf.last_bilateral_level = 0; +#endif // CONFIG_LOOP_RESTORATION } static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { @@ -3096,7 +3110,7 @@ static size_t read_uncompressed_header(VP10Decoder *pbi, if (frame_is_intra_only(cm) || cm->error_resilient_mode) vp10_setup_past_independence(cm); - setup_loopfilter(&cm->lf, rb); + setup_loopfilter(cm, rb); setup_quantization(cm, rb); #if CONFIG_VP9_HIGHBITDEPTH xd->bd = (int)cm->bit_depth; @@ -3445,6 +3459,13 @@ void vp10_decode_frame(VP10Decoder *pbi, } else { *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); } +#if CONFIG_LOOP_RESTORATION + vp10_loop_bilateral_init(&cm->lf_info, cm->lf.bilateral_level, + cm->frame_type == KEY_FRAME); + if (cm->lf_info.bilateral_used) { + vp10_loop_bilateral_rows(new_fb, cm, 0, cm->mi_rows, 0); + } +#endif // CONFIG_LOOP_RESTORATION if (!xd->corrupted) { if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 177dcc3f9..2e31d779a 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -1645,9 +1645,10 @@ static void update_coef_probs(VP10_COMP *cpi, vpx_writer* w) { } } -static void encode_loopfilter(struct loopfilter *lf, +static void encode_loopfilter(VP10_COMMON *cm, struct vpx_write_bit_buffer *wb) { int i; + struct loopfilter *lf = &cm->lf; // Encode the loop filter level and type vpx_wb_write_literal(wb, lf->filter_level, 6); @@ -1681,6 +1682,15 @@ static void encode_loopfilter(struct loopfilter *lf, } } } +#if CONFIG_LOOP_RESTORATION + vpx_wb_write_bit(wb, lf->bilateral_level != lf->last_bilateral_level); + if (lf->bilateral_level != lf->last_bilateral_level) { + int level = lf->bilateral_level - + (lf->bilateral_level > lf->last_bilateral_level); + vpx_wb_write_literal(wb, level, + vp10_bilateral_level_bits(cm)); + } +#endif // CONFIG_LOOP_RESTORATION } static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) { @@ -2139,7 +2149,7 @@ static void write_uncompressed_header(VP10_COMP *cpi, vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2); - encode_loopfilter(&cm->lf, wb); + encode_loopfilter(cm, wb); encode_quantization(cm, wb); encode_segmentation(cm, xd, wb); if (!cm->seg.enabled && xd->lossless[0]) diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 48d873d26..c9f5fe575 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -377,10 +377,16 @@ static void dealloc_compressor_data(VP10_COMP *cpi) { vp10_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_VP9_POSTPROC vp10_free_postproc_buffers(cm); -#endif +#endif // CONFIG_VP9_POSTPROC +#if CONFIG_LOOP_RESTORATION + vp10_free_restoration_buffers(cm); +#endif // CONFIG_LOOP_RESTORATION vp10_free_context_buffers(cm); vpx_free_frame_buffer(&cpi->last_frame_uf); +#if CONFIG_LOOP_RESTORATION + vpx_free_frame_buffer(&cpi->last_frame_db); +#endif // CONFIG_LOOP_RESTORATION vpx_free_frame_buffer(&cpi->scaled_source); vpx_free_frame_buffer(&cpi->scaled_last_source); vpx_free_frame_buffer(&cpi->alt_ref_buffer); @@ -634,6 +640,19 @@ static void alloc_util_frame_buffers(VP10_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate last frame buffer"); +#if CONFIG_LOOP_RESTORATION + if (vpx_realloc_frame_buffer(&cpi->last_frame_db, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, + NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate last frame deblocked buffer"); +#endif // CONFIG_LOOP_RESTORATION + if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, @@ -2759,6 +2778,12 @@ static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) { vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); #endif } +#if CONFIG_LOOP_RESTORATION + vp10_loop_bilateral_init(&cm->lf_info, cm->lf.bilateral_level, + cm->frame_type == KEY_FRAME); + if (cm->lf_info.bilateral_used) + vp10_loop_bilateral_rows(cm->frame_to_show, cm, 0, cm->mi_rows, 0); +#endif // CONFIG_LOOP_RESTORATION vpx_extend_frame_inner_borders(cm->frame_to_show); } @@ -3867,6 +3892,12 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi, cm->last2_frame_type = cm->last_frame_type; #endif // CONFIG_EXT_REFS cm->last_frame_type = cm->frame_type; +#if CONFIG_LOOP_RESTORATION + if (cm->frame_type != KEY_FRAME) + cm->lf.last_bilateral_level = cm->lf.bilateral_level; + else + cm->lf.last_bilateral_level = 0; +#endif // CONFIG_LOOP_RESTORATION vp10_rc_postencode_update(cpi, *size); diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index 40bc4d760..797abacaf 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -338,6 +338,9 @@ typedef struct VP10_COMP { int ext_refresh_frame_context; YV12_BUFFER_CONFIG last_frame_uf; +#if CONFIG_LOOP_RESTORATION + YV12_BUFFER_CONFIG last_frame_db; +#endif // CONFIG_LOOP_RESTORATION TOKENEXTRA *tile_tok[4][1 << 6]; unsigned int tok_count[4][1 << 6]; diff --git a/vp10/encoder/picklpf.c b/vp10/encoder/picklpf.c index 1f5711df1..9bd1555cb 100644 --- a/vp10/encoder/picklpf.c +++ b/vp10/encoder/picklpf.c @@ -43,15 +43,16 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, #if CONFIG_VAR_TX vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, - 1, partial_frame); + 1, partial_frame); #else if (cpi->num_workers > 1) vp10_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane, - filt_level, 1, partial_frame, - cpi->workers, cpi->num_workers, &cpi->lf_row_sync); + filt_level, 1, partial_frame, + cpi->workers, cpi->num_workers, + &cpi->lf_row_sync); else vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, - 1, partial_frame); + 1, partial_frame); #endif #if CONFIG_VP9_HIGHBITDEPTH @@ -70,6 +71,177 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, return filt_err; } +#if CONFIG_LOOP_RESTORATION +#define JOINT_FILTER_BILATERAL_SEARCH +#define USE_RD_LOOP_POSTFILTER_SEARCH +static int try_bilateral_frame(const YV12_BUFFER_CONFIG *sd, + VP10_COMP *const cpi, + int bilateral_level, + int partial_frame) { + VP10_COMMON *const cm = &cpi->common; + int filt_err; + vp10_loop_bilateral_frame(cm->frame_to_show, cm, + bilateral_level, 1, partial_frame); +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + filt_err = vp10_highbd_get_y_sse(sd, cm->frame_to_show); + } else { + filt_err = vp10_get_y_sse(sd, cm->frame_to_show); + } +#else + filt_err = vp10_get_y_sse(sd, cm->frame_to_show); +#endif // CONFIG_VP9_HIGHBITDEPTH + + // Re-instate the unfiltered frame + vpx_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show); + return filt_err; +} + +static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd, + VP10_COMP *cpi, + int filter_level, int partial_frame, + double *best_cost_ret) { + VP10_COMMON *const cm = &cpi->common; + int i, bilateral_best, err; + double best_cost; + double cost; + const int bilateral_level_bits = vp10_bilateral_level_bits(&cpi->common); + const int bilateral_levels = 1 << bilateral_level_bits; +#ifdef USE_RD_LOOP_POSTFILTER_SEARCH + MACROBLOCK *x = &cpi->td.mb; + int bits; +#endif + + // Make a copy of the unfiltered / processed recon buffer + vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); + vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level, + 1, partial_frame); + vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db); + + bilateral_best = 0; + err = try_bilateral_frame(sd, cpi, 0, partial_frame); +#ifdef USE_RD_LOOP_POSTFILTER_SEARCH + bits = cm->lf.last_bilateral_level == 0 ? 0 : bilateral_level_bits; + cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err); +#else + cost = (double)err; +#endif // USE_RD_LOOP_POSTFILTER_SEARCH + best_cost = cost; + for (i = 1; i <= bilateral_levels; ++i) { + err = try_bilateral_frame(sd, cpi, i, partial_frame); +#ifdef USE_RD_LOOP_POSTFILTER_SEARCH + // Normally the rate is rate in bits * 256 and dist is sum sq err * 64 + // when RDCOST is used. However below we just scale both in the correct + // ratios appropriately but not exactly by these values. + bits = cm->lf.last_bilateral_level == i ? 0 : bilateral_level_bits; + cost = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err); +#else + cost = (double)err; +#endif // USE_RD_LOOP_POSTFILTER_SEARCH + if (cost < best_cost) { + bilateral_best = i; + best_cost = cost; + } + } + if (best_cost_ret) *best_cost_ret = best_cost; + vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); + return bilateral_best; +} + +#ifdef JOINT_FILTER_BILATERAL_SEARCH +static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd, + VP10_COMP *cpi, + int partial_frame, + int *bilateral_level) { + const VP10_COMMON *const cm = &cpi->common; + const struct loopfilter *const lf = &cm->lf; + const int min_filter_level = 0; + const int max_filter_level = get_max_filter_level(cpi); + int filt_direction = 0; + int filt_best, bilateral_best; + double best_err; + int i; + + // Start the search at the previous frame filter level unless it is now out of + // range. + int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); + int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; + double ss_err[MAX_LOOP_FILTER + 1]; + int bilateral; + + // Set each entry to -1 + for (i = 0; i <= MAX_LOOP_FILTER; ++i) + ss_err[i] = -1.0; + + bilateral = search_bilateral_level(sd, cpi, filt_mid, + partial_frame, &best_err); + filt_best = filt_mid; + bilateral_best = bilateral; + ss_err[filt_mid] = best_err; + + while (filter_step > 0) { + const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level); + const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level); + + // Bias against raising loop filter in favor of lowering it. + double bias = (best_err / (1 << (15 - (filt_mid / 8)))) * filter_step; + + if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20)) + bias = (bias * cpi->twopass.section_intra_rating) / 20; + + // yx, bias less for large block size + if (cm->tx_mode != ONLY_4X4) + bias /= 2; + + if (filt_direction <= 0 && filt_low != filt_mid) { + // Get Low filter error score + if (ss_err[filt_low] < 0) { + bilateral = search_bilateral_level(sd, cpi, filt_low, + partial_frame, + &ss_err[filt_low]); + } + // If value is close to the best so far then bias towards a lower loop + // filter value. + if ((ss_err[filt_low] - bias) < best_err) { + // Was it actually better than the previous best? + if (ss_err[filt_low] < best_err) { + best_err = ss_err[filt_low]; + } + + filt_best = filt_low; + bilateral_best = bilateral; + } + } + + // Now look at filt_high + if (filt_direction >= 0 && filt_high != filt_mid) { + if (ss_err[filt_high] < 0) { + bilateral = search_bilateral_level(sd, cpi, filt_high, partial_frame, + &ss_err[filt_high]); + } + // Was it better than the previous best? + if (ss_err[filt_high] < (best_err - bias)) { + best_err = ss_err[filt_high]; + filt_best = filt_high; + bilateral_best = bilateral; + } + } + + // Half the step distance if the best filter value was the same as last time + if (filt_best == filt_mid) { + filter_step /= 2; + filt_direction = 0; + } else { + filt_direction = (filt_best < filt_mid) ? -1 : 1; + filt_mid = filt_best; + } + } + *bilateral_level = bilateral_best; + return filt_best; +} +#endif // JOINT_FILTER_BILATERAL_SEARCH +#endif // CONFIG_LOOP_RESTORATION + static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, int partial_frame) { const VP10_COMMON *const cm = &cpi->common; @@ -191,8 +363,24 @@ void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi, if (cm->frame_type == KEY_FRAME) filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); +#if CONFIG_LOOP_RESTORATION + lf->bilateral_level = search_bilateral_level( + sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, NULL); +#endif // CONFIG_LOOP_RESTORATION } else { - lf->filter_level = search_filter_level(sd, cpi, - method == LPF_PICK_FROM_SUBIMAGE); +#if CONFIG_LOOP_RESTORATION +#ifdef JOINT_FILTER_BILATERAL_SEARCH + lf->filter_level = search_filter_bilateral_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, &lf->bilateral_level); +#else + lf->filter_level = search_filter_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); + lf->bilateral_level = search_bilateral_level( + sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE, NULL); +#endif // JOINT_FILTER_BILATERAL_SEARCH +#else + lf->filter_level = search_filter_level( + sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); +#endif // CONFIG_LOOP_RESTORATION } } diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h index 42d8ea167..2b6106d95 100644 --- a/vp10/encoder/rd.h +++ b/vp10/encoder/rd.h @@ -26,6 +26,10 @@ extern "C" { #define RDCOST(RM, DM, R, D) \ (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM)) + +#define RDCOST_DBL(RM, DM, R, D) \ + (((((double)(R)) * (RM)) / 256.0) + ((double)(D) * (1 << (DM)))) + #define QIDX_SKIP_THRESH 115 #define MV_COST_WEIGHT 108 diff --git a/vp10/vp10_dx_iface.c b/vp10/vp10_dx_iface.c index 33337a4bd..a0a58e85b 100644 --- a/vp10/vp10_dx_iface.c +++ b/vp10/vp10_dx_iface.c @@ -122,6 +122,9 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { #if CONFIG_VP9_POSTPROC vp10_free_postproc_buffers(&frame_worker_data->pbi->common); #endif +#if CONFIG_LOOP_RESTORATION + vp10_free_restoration_buffers(&frame_worker_data->pbi->common); +#endif // CONFIG_LOOP_RESTORATION vp10_decoder_remove(frame_worker_data->pbi); vpx_free(frame_worker_data->scratch_buffer); #if CONFIG_MULTITHREAD -- 2.40.0