From 7ac3e3c1d68a8e39b9195f5b188ccdaa9340b5b7 Mon Sep 17 00:00:00 2001 From: JackyChen Date: Tue, 2 Dec 2014 12:14:47 -0800 Subject: [PATCH] Multiframe Quality Enhancement(MFQE) in VP9. It is the first version of MFQE in VP9. There are a few TODOs included in this version. Usage: Add flag --enable-vp9-postproc to config the project. In decoder, use flag --mfqe in the command line to enable MFQE in postproc. Note: Need to have key frame with low quality to see the effect of this new patch. In my experiment, I fixed the qindex to 200 in key frame. Change-Id: I021f9ce4616ed3574c81e48d968662994b56a396 --- vp9/common/vp9_alloccommon.c | 1 + vp9/common/vp9_mfqe.c | 314 +++++++++++++++++++++++++++++++++++ vp9/common/vp9_mfqe.h | 31 ++++ vp9/common/vp9_onyxc_int.h | 1 + vp9/common/vp9_postproc.c | 76 ++++++++- vp9/common/vp9_postproc.h | 8 + vp9/common/vp9_ppflags.h | 3 +- vp9/vp9_common.mk | 2 + 8 files changed, 434 insertions(+), 2 deletions(-) create mode 100644 vp9/common/vp9_mfqe.c create mode 100644 vp9/common/vp9_mfqe.h diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 284d3a2b5..cb299f9f7 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -45,6 +45,7 @@ void vp9_free_ref_frame_buffers(VP9_COMMON *cm) { } vp9_free_frame_buffer(&cm->post_proc_buffer); + vp9_free_frame_buffer(&cm->post_proc_buffer_int); } void vp9_free_context_buffers(VP9_COMMON *cm) { diff --git a/vp9/common/vp9_mfqe.c b/vp9/common/vp9_mfqe.c new file mode 100644 index 000000000..f1bdc1b06 --- /dev/null +++ b/vp9/common/vp9_mfqe.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_scale_rtcd.h" +#include "./vp9_rtcd.h" + +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_postproc.h" + +// TODO(jackychen): Replace this function with SSE2 code. There is +// one SSE2 implementation in vp8, so will consider how to share it +// between vp8 and vp9. +static void filter_by_weight(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + int block_size, int src_weight) { + const int dst_weight = (1 << MFQE_PRECISION) - src_weight; + const int rounding_bit = 1 << (MFQE_PRECISION - 1); + int r, c; + + for (r = 0; r < block_size; r++) { + for (c = 0; c < block_size; c++) { + dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) + >> MFQE_PRECISION; + } + src += src_stride; + dst += dst_stride; + } +} + +static void filter_by_weight32x32(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int weight) { + filter_by_weight(src, src_stride, dst, dst_stride, 16, weight); + filter_by_weight(src + 16, src_stride, dst + 16, dst_stride, 16, weight); + filter_by_weight(src + src_stride * 16, src_stride, dst + dst_stride * 16, + dst_stride, 16, weight); + filter_by_weight(src + src_stride * 16 + 16, src_stride, + dst + dst_stride * 16 + 16, dst_stride, 16, weight); +} + +static void filter_by_weight64x64(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, int weight) { + filter_by_weight32x32(src, src_stride, dst, dst_stride, weight); + filter_by_weight32x32(src + 32, src_stride, dst + 32, + dst_stride, weight); + filter_by_weight32x32(src + src_stride * 32, src_stride, + dst + dst_stride * 32, dst_stride, weight); + filter_by_weight32x32(src + src_stride * 32 + 32, src_stride, + dst + dst_stride * 32 + 32, dst_stride, weight); +} + +static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd, + int yd_stride, const uint8_t *u, const uint8_t *v, + int uv_stride, uint8_t *ud, uint8_t *vd, + int uvd_stride, BLOCK_SIZE block_size, + int weight) { + if (block_size == BLOCK_16X16) { + filter_by_weight(y, y_stride, yd, yd_stride, 16, weight); + filter_by_weight(u, uv_stride, ud, uvd_stride, 8, weight); + filter_by_weight(v, uv_stride, vd, uvd_stride, 8, weight); + } else if (block_size == BLOCK_32X32) { + filter_by_weight32x32(y, y_stride, yd, yd_stride, weight); + filter_by_weight(u, uv_stride, ud, uvd_stride, 16, weight); + filter_by_weight(v, uv_stride, vd, uvd_stride, 16, weight); + } else if (block_size == BLOCK_64X64) { + filter_by_weight64x64(y, y_stride, yd, yd_stride, weight); + filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight); + filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight); + } +} + +// TODO(jackychen): Determine whether replace it with assembly code. +static void copy_mem8x8(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride) { + int r; + for (r = 0; r < 8; r++) { + memcpy(dst, src, 8); + src += src_stride; + dst += dst_stride; + } +} + +static void copy_mem16x16(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride) { + int r; + for (r = 0; r < 16; r++) { + memcpy(dst, src, 16); + src += src_stride; + dst += dst_stride; + } +} + +static void copy_mem32x32(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride) { + copy_mem16x16(src, src_stride, dst, dst_stride); + copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride); + copy_mem16x16(src + src_stride * 16, src_stride, + dst + dst_stride * 16, dst_stride); + copy_mem16x16(src + src_stride * 16 + 16, src_stride, + dst + dst_stride * 16 + 16, dst_stride); +} + +void copy_mem64x64(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride) { + copy_mem32x32(src, src_stride, dst, dst_stride); + copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride); + copy_mem32x32(src + src_stride * 32, src_stride, + dst + src_stride * 32, dst_stride); + copy_mem32x32(src + src_stride * 32 + 32, src_stride, + dst + src_stride * 32 + 32, dst_stride); +} + +static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, + int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud, + uint8_t *vd, int yd_stride, int uvd_stride, + BLOCK_SIZE bs) { + if (bs == BLOCK_16X16) { + copy_mem16x16(y, y_stride, yd, yd_stride); + copy_mem8x8(u, uv_stride, ud, uvd_stride); + copy_mem8x8(v, uv_stride, vd, uvd_stride); + } else if (bs == BLOCK_32X32) { + copy_mem32x32(y, y_stride, yd, yd_stride); + copy_mem16x16(u, uv_stride, ud, uvd_stride); + copy_mem16x16(v, uv_stride, vd, uvd_stride); + } else { + copy_mem64x64(y, y_stride, yd, yd_stride); + copy_mem32x32(u, uv_stride, ud, uvd_stride); + copy_mem32x32(v, uv_stride, vd, uvd_stride); + } +} + +static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, + const uint8_t *v, int y_stride, int uv_stride, + uint8_t *yd, uint8_t *ud, uint8_t *vd, + int yd_stride, int uvd_stride) { + int sad, sad_thr, vdiff; + uint32_t sse; + + if (bs == BLOCK_16X16) { + vdiff = (vp9_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8; + sad = (vp9_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; + } else if (bs == BLOCK_32X32) { + vdiff = (vp9_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10; + sad = (vp9_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10; + } else /* if (bs == BLOCK_64X64) */ { + vdiff = (vp9_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12; + sad = (vp9_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12; + } + + if (bs == BLOCK_16X16) { + sad_thr = 8; + } else if (bs == BLOCK_32X32) { + sad_thr = 7; + } else { // BLOCK_64X64 + sad_thr = 6; + } + + // TODO(jackychen): More experiments and remove magic numbers. + // vdiff > sad * 3 means vdiff should not be too small, otherwise, + // it might be a lighting change in smooth area. When there is a + // lighting change in smooth area, it is dangerous to do MFQE. + if (sad > 1 && sad < sad_thr && vdiff > sad * 3 && vdiff < 150) { + // TODO(jackychen): Add weighted average in the calculation. + // Currently, the data is copied from last frame without averaging. + apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, + ud, vd, uvd_stride, bs, 0); + } else { + // Copy the block from current frame (i.e., no mfqe is done). + copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, + yd_stride, uvd_stride, bs); + } +} + +static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) { + // Check the motion in current block(for inter frame), + // or check the motion in the correlated block in last frame (for keyframe). + const int mv_len_square = mi->mbmi.mv[0].as_mv.row * + mi->mbmi.mv[0].as_mv.row + + mi->mbmi.mv[0].as_mv.col * + mi->mbmi.mv[0].as_mv.col; + const int mv_threshold = 100; + return mi->mbmi.mode >= NEARESTMV && // Not an intra block + cur_bs >= BLOCK_16X16 && + mv_len_square <= mv_threshold; +} + +// Process each partiton in a super block, recursively. +static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs, + const uint8_t *y, const uint8_t *u, + const uint8_t *v, int y_stride, int uv_stride, + uint8_t *yd, uint8_t *ud, uint8_t *vd, + int yd_stride, int uvd_stride) { + int mi_offset, y_offset, uv_offset; + const BLOCK_SIZE cur_bs = mi->mbmi.sb_type; + // TODO(jackychen): Consider how and whether to use qdiff in MFQE. + // int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex; + const int bsl = b_width_log2_lookup[bs]; + PARTITION_TYPE partition = partition_lookup[bsl][cur_bs]; + const BLOCK_SIZE subsize = get_subsize(bs, partition); + + if (cur_bs < BLOCK_8X8) { + // If there are blocks smaller than 8x8, it must be on the boundary. + return; + } + // No MFQE on blocks smaller than 16x16 + if (partition == PARTITION_SPLIT && bs == BLOCK_16X16) { + partition = PARTITION_NONE; + } + switch (partition) { + case PARTITION_HORZ: + case PARTITION_VERT: + // If current block size is not square. + // Copy the block from current frame(i.e., no mfqe is done). + // TODO(jackychen): Rectangle blocks should also be taken into account. + copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, + yd_stride, uvd_stride, bs); + break; + case PARTITION_NONE: + if (mfqe_decision(mi, cur_bs)) { + // Do mfqe on this partition. + mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, + yd, ud, vd, yd_stride, uvd_stride); + } else { + // Copy the block from current frame(i.e., no mfqe is done). + copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, + yd_stride, uvd_stride, bs); + } + break; + case PARTITION_SPLIT: + if (bs == BLOCK_64X64) { + mi_offset = 4; + y_offset = 32; + uv_offset = 16; + } else { + mi_offset = 2; + y_offset = 16; + uv_offset = 8; + } + // Recursion on four square partitions, e.g. if bs is 64X64, + // then look into four 32X32 blocks in it. + mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd, + yd_stride, uvd_stride); + mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset, + v + uv_offset, y_stride, uv_stride, yd + y_offset, + ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride); + mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize, + y + y_offset * y_stride, u + uv_offset * uv_stride, + v + uv_offset * uv_stride, y_stride, uv_stride, + yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, + vd + uv_offset * uvd_stride, yd_stride, uvd_stride); + mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, + subsize, y + y_offset * y_stride + y_offset, + u + uv_offset * uv_stride + uv_offset, + v + uv_offset * uv_stride + uv_offset, y_stride, + uv_stride, yd + y_offset * yd_stride + y_offset, + ud + uv_offset * uvd_stride + uv_offset, + vd + uv_offset * uvd_stride + uv_offset, + yd_stride, uvd_stride); + break; + default: + assert(0); + } +} + +void vp9_mfqe(VP9_COMMON *cm) { + int mi_row, mi_col; + // Current decoded frame. + const YV12_BUFFER_CONFIG *show = cm->frame_to_show; + // Last decoded frame and will store the MFQE result. + YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; + // Loop through each super block. + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + MODE_INFO *mi; + MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col); + // Motion Info in last frame. + MODE_INFO *mi_prev = cm->postproc_state.prev_mi + + (mi_row * cm->mi_stride + mi_col); + const uint32_t y_stride = show->y_stride; + const uint32_t uv_stride = show->uv_stride; + const uint32_t yd_stride = dest->y_stride; + const uint32_t uvd_stride = dest->uv_stride; + const uint32_t row_offset_y = mi_row << 3; + const uint32_t row_offset_uv = mi_row << 2; + const uint32_t col_offset_y = mi_col << 3; + const uint32_t col_offset_uv = mi_col << 2; + const uint8_t *y = show->y_buffer + row_offset_y * y_stride + + col_offset_y; + const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride + + col_offset_uv; + const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride + + col_offset_uv; + uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y; + uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + + col_offset_uv; + uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + + col_offset_uv; + if (frame_is_intra_only(cm)) { + mi = mi_prev; + } else { + mi = mi_local; + } + mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud, + vd, yd_stride, uvd_stride); + } + } +} diff --git a/vp9/common/vp9_mfqe.h b/vp9/common/vp9_mfqe.h new file mode 100644 index 000000000..dfff8c23d --- /dev/null +++ b/vp9/common/vp9_mfqe.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_MFQE_H_ +#define VP9_COMMON_VP9_MFQE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +// Multiframe Quality Enhancement. +// The aim for MFQE is to replace pixel blocks in the current frame with +// the correlated pixel blocks (with higher quality) in the last frame. +// The replacement can only be taken in stationary blocks by checking +// the motion of the blocks and other conditions such as the SAD of +// the current block and correlated block, the variance of the block +// difference, etc. +void vp9_mfqe(struct VP9Common *cm); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_MFQE_H_ diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index ae69c0c00..98c7e2a6e 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -113,6 +113,7 @@ typedef struct VP9Common { int new_fb_idx; YV12_BUFFER_CONFIG post_proc_buffer; + YV12_BUFFER_CONFIG post_proc_buffer_int; FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ FRAME_TYPE frame_type; diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c index 575ffbc30..e1a389132 100644 --- a/vp9/common/vp9_postproc.c +++ b/vp9/common/vp9_postproc.c @@ -79,6 +79,9 @@ const short vp9_rv[] = { 0, 9, 5, 5, 11, 10, 13, 9, 10, 13, }; +static const uint8_t q_diff_thresh = 20; +static const uint8_t last_q_thresh = 170; + void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, @@ -616,6 +619,17 @@ void vp9_plane_add_noise_c(uint8_t *start, char *noise, } } +static void swap_mi_and_prev_mi(VP9_COMMON *cm) { + // Current mip will be the prev_mip for the next frame. + MODE_INFO *temp = cm->postproc_state.prev_mip; + cm->postproc_state.prev_mip = cm->mip; + cm->mip = temp; + + // Update the upper left visible macroblock ptrs. + cm->mi = cm->mip + cm->mi_stride + 1; + cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1; +} + int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags) { const int q = MIN(63, cm->lf.filter_level * 10 / 6); @@ -633,6 +647,42 @@ int vp9_post_proc_frame(struct VP9Common *cm, vp9_clear_system_state(); + // Alloc memory for prev_mip in the first frame. + if (cm->current_video_frame == 1) { + cm->postproc_state.last_base_qindex = cm->base_qindex; + cm->postproc_state.last_frame_valid = 1; + ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip)); + if (!ppstate->prev_mip) { + return 1; + } + ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1; + vpx_memset(ppstate->prev_mip, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); + } + + // Allocate post_proc_buffer_int if needed. + if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) { + if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { + const int width = ALIGN_POWER_OF_TWO(cm->width, 4); + const int height = ALIGN_POWER_OF_TWO(cm->height, 4); + + if (vp9_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif // CONFIG_VP9_HIGHBITDEPTH + VP9_ENC_BORDER_IN_PIXELS) < 0) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate MFQE framebuffer"); + } + + // Ensure that postproc is set to all 0s so that post proc + // doesn't pull random data in from edge. + vpx_memset(cm->post_proc_buffer_int.buffer_alloc, 128, + cm->post_proc_buffer.frame_size); + } + } + #if CONFIG_VP9_POSTPROC || CONFIG_INTERNAL_STATS if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, @@ -644,7 +694,27 @@ int vp9_post_proc_frame(struct VP9Common *cm, "Failed to allocate post-processing buffer"); #endif - if (flags & VP9D_DEMACROBLOCK) { + if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 && + cm->postproc_state.last_frame_valid && + cm->postproc_state.last_base_qindex <= last_q_thresh && + cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) { + vp9_mfqe(cm); + // TODO(jackychen): Consider whether enable deblocking by default + // if mfqe is enabled. Need to take both the quality and the speed + // into consideration. + if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { + vp8_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int); + } + if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) { + deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf, + q + (ppflags->deblocking_level - 5) * 10, + 1, 0); + } else if (flags & VP9D_DEBLOCK) { + vp9_deblock(&cm->post_proc_buffer_int, ppbuf, q); + } else { + vp8_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf); + } + } else if (flags & VP9D_DEMACROBLOCK) { deblock_and_de_macro_block(cm->frame_to_show, ppbuf, q + (ppflags->deblocking_level - 5) * 10, 1, 0); } else if (flags & VP9D_DEBLOCK) { @@ -653,6 +723,9 @@ int vp9_post_proc_frame(struct VP9Common *cm, vp8_yv12_copy_frame(cm->frame_to_show, ppbuf); } + cm->postproc_state.last_base_qindex = cm->base_qindex; + cm->postproc_state.last_frame_valid = 1; + if (flags & VP9D_ADDNOISE) { const int noise_level = ppflags->noise_level; if (ppstate->last_q != q || @@ -673,6 +746,7 @@ int vp9_post_proc_frame(struct VP9Common *cm, dest->uv_width = dest->y_width >> cm->subsampling_x; dest->uv_height = dest->y_height >> cm->subsampling_y; + swap_mi_and_prev_mi(cm); return 0; } #endif diff --git a/vp9/common/vp9_postproc.h b/vp9/common/vp9_postproc.h index ebebc1ae3..035c9cdf8 100644 --- a/vp9/common/vp9_postproc.h +++ b/vp9/common/vp9_postproc.h @@ -14,6 +14,8 @@ #include "vpx_ports/mem.h" #include "vpx_scale/yv12config.h" +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_mfqe.h" #include "vp9/common/vp9_ppflags.h" #ifdef __cplusplus @@ -24,6 +26,10 @@ struct postproc_state { int last_q; int last_noise; char noise[3072]; + int last_base_qindex; + int last_frame_valid; + MODE_INFO *prev_mip; + MODE_INFO *prev_mi; DECLARE_ALIGNED(16, char, blackclamp[16]); DECLARE_ALIGNED(16, char, whiteclamp[16]); DECLARE_ALIGNED(16, char, bothclamp[16]); @@ -31,6 +37,8 @@ struct postproc_state { struct VP9Common; +#define MFQE_PRECISION 4 + int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags); diff --git a/vp9/common/vp9_ppflags.h b/vp9/common/vp9_ppflags.h index 1644a1bbb..12b989f43 100644 --- a/vp9/common/vp9_ppflags.h +++ b/vp9/common/vp9_ppflags.h @@ -26,7 +26,8 @@ enum { VP9D_DEBUG_TXT_RATE_INFO = 1 << 6, VP9D_DEBUG_DRAW_MV = 1 << 7, VP9D_DEBUG_CLR_BLK_MODES = 1 << 8, - VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9 + VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9, + VP9D_MFQE = 1 << 10 }; typedef struct { diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 9414120f6..4b26c0593 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -72,6 +72,8 @@ VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_ss VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_loopfilter_intrin_avx2.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c +VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h +VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm -- 2.40.0