From ca6bfe2cd49c2b519744d6184153393707a92921 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Fri, 10 Aug 2018 16:42:21 -0700 Subject: [PATCH] Unify the YUV plane temporal filter operation Unify the temporal filter operations for the luma and chroma components. Handle them in a single loop over the pixels in the processing block. Change-Id: I9ea1946f3a6fb37da6867aa78140d45cad0facf0 --- vp9/encoder/vp9_temporal_filter.c | 160 ++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 22 deletions(-) diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 627a1c7b5..c0ecbb7b1 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -94,6 +94,130 @@ void vp9_temporal_filter_init(void) { for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i; } +static int mod_index(int sum_dist, int index, int rounding, int strength, + int filter_weight) { + int mod = (sum_dist * 3) / index; + mod += rounding; + mod >>= strength; + + mod = VPXMIN(16, mod); + + mod = 16 - mod; + mod *= filter_weight; + + return mod; +} + +static void apply_temporal_filter( + const uint8_t *y_frame1, int y_stride, const uint8_t *y_pred, + int y_buf_stride, const uint8_t *u_frame1, const uint8_t *v_frame1, + int uv_stride, const uint8_t *u_pred, const uint8_t *v_pred, + int uv_buf_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, int filter_weight, + uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, + uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count) { + unsigned int i, j, k, m; + int modifier; + const int rounding = (1 << strength) >> 1; + const int uv_block_width = block_width >> ss_x; + const int uv_block_height = block_height >> ss_y; + + assert(strength >= 0); + assert(strength <= 6); + + assert(filter_weight >= 0); + assert(filter_weight <= 2); + + for (i = 0, k = 0, m = 0; i < block_height; i++) { + for (j = 0; j < block_width; j++) { + const int pixel_value = y_pred[i * y_buf_stride + j]; + + // non-local mean approach + int diff_sse[9] = { 0 }; + int idx, idy, index = 0; + + for (idy = -1; idy <= 1; ++idy) { + for (idx = -1; idx <= 1; ++idx) { + const int row = (int)i + idy; + const int col = (int)j + idx; + + if (row >= 0 && row < (int)block_height && col >= 0 && + col < (int)block_width) { + const int diff = y_frame1[row * (int)y_stride + col] - + y_pred[row * (int)block_width + col]; + diff_sse[index] = diff * diff; + ++index; + } + } + } + + assert(index > 0); + + modifier = 0; + for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; + + modifier = mod_index(modifier, index, rounding, strength, filter_weight); + + y_count[k] += modifier; + y_accumulator[k] += modifier * pixel_value; + + ++k; + + // Process chroma component + if (!(i & ss_y) && !(j & ss_x)) { + const int uv_r = i >> ss_y; + const int uv_c = j >> ss_x; + + const int u_pixel_value = u_pred[uv_r * uv_buf_stride + uv_c]; + const int v_pixel_value = v_pred[uv_r * uv_buf_stride + uv_c]; + + // non-local mean approach + int u_diff_sse[9] = { 0 }; + int v_diff_sse[9] = { 0 }; + int idx, idy, index = 0; + int u_mod = 0, v_mod = 0; + + for (idy = -1; idy <= 1; ++idy) { + for (idx = -1; idx <= 1; ++idx) { + const int row = uv_r + idy; + const int col = uv_c + idx; + + if (row >= 0 && row < uv_block_height && col >= 0 && + col < uv_block_width) { + int diff = u_frame1[row * uv_stride + col] - + u_pred[row * uv_buf_stride + col]; + u_diff_sse[index] = diff * diff; + + diff = v_frame1[row * uv_stride + col] - + v_pred[row * uv_buf_stride + col]; + v_diff_sse[index] = diff * diff; + + ++index; + } + } + } + + assert(index > 0); + + for (idx = 0; idx < 9; ++idx) { + u_mod += u_diff_sse[idx]; + v_mod += v_diff_sse[idx]; + } + + u_mod = mod_index(u_mod, index, rounding, strength, filter_weight); + v_mod = mod_index(v_mod, index, rounding, strength, filter_weight); + + u_count[m] += u_mod; + u_accumulator[m] += u_mod * u_pixel_value; + v_count[m] += v_mod; + v_accumulator[m] += v_mod * v_pixel_value; + + ++m; + } // Complete YUV pixel + } + } +} + void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, @@ -421,31 +545,23 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, accumulator + 512, count + 512); } else { // Apply the filter (YUV) - vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, strength, filter_weight, - accumulator, count); - vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, - predictor + 256, mb_uv_width, mb_uv_height, - strength, filter_weight, accumulator + 256, - count + 256); - vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + 512, mb_uv_width, mb_uv_height, - strength, filter_weight, accumulator + 512, - count + 512); + apply_temporal_filter( + f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, + f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset, + f->uv_stride, predictor + 256, predictor + 512, mb_uv_width, 16, + 16, mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y, + strength, filter_weight, accumulator, count, accumulator + 256, + count + 256, accumulator + 512, count + 512); } #else // Apply the filter (YUV) - vp9_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, strength, filter_weight, - accumulator, count); - vp9_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride, - predictor + 256, mb_uv_width, mb_uv_height, - strength, filter_weight, accumulator + 256, - count + 256); - vp9_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + 512, mb_uv_width, mb_uv_height, - strength, filter_weight, accumulator + 512, - count + 512); + apply_temporal_filter( + f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, + f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset, + f->uv_stride, predictor + 256, predictor + 512, mb_uv_width, 16, 16, + mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y, strength, + filter_weight, accumulator, count, accumulator + 256, count + 256, + accumulator + 512, count + 512); #endif // CONFIG_VP9_HIGHBITDEPTH } } -- 2.40.0