}
}
-#if CONFIG_VP9_TEMPORAL_DENOISING
-// Check basic datarate targeting, for a single bitrate, when denoiser is on.
-TEST_P(DatarateTestVP9Large, DenoiserLevels) {
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 500;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_dropframe_thresh = 1;
- cfg_.rc_min_quantizer = 2;
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_end_usage = VPX_CBR;
- cfg_.g_lag_in_frames = 0;
-
- ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 140);
-
- // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
- // there is only one denoiser mode: denoiserYonly(which is 1),
- // but may add more modes in the future.
- cfg_.rc_target_bitrate = 300;
- ResetModel();
- // Turn on the denoiser.
- denoiser_on_ = 1;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
- << " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
- << " The datarate for the file is greater than target by too much!";
-}
-
-// Check basic datarate targeting, for a single bitrate, when denoiser is off
-// and on.
-TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
- cfg_.rc_buf_initial_sz = 500;
- cfg_.rc_buf_optimal_sz = 500;
- cfg_.rc_buf_sz = 1000;
- cfg_.rc_dropframe_thresh = 1;
- cfg_.rc_min_quantizer = 2;
- cfg_.rc_max_quantizer = 56;
- cfg_.rc_end_usage = VPX_CBR;
- cfg_.g_lag_in_frames = 0;
-
- ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 299);
-
- // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
- // there is only one denoiser mode: denoiserYonly(which is 1),
- // but may add more modes in the future.
- cfg_.rc_target_bitrate = 300;
- ResetModel();
- // The denoiser is off by default.
- denoiser_on_ = 0;
- // Set the offon test flag.
- denoiser_offon_test_ = 1;
- denoiser_offon_period_ = 100;
- ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
- ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
- << " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
- << " The datarate for the file is greater than target by too much!";
-}
-#endif // CONFIG_VP9_TEMPORAL_DENOISING
-
class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
public:
}
#endif // CONFIG_LOOP_RESTORATION
-void vp10_free_postproc_buffers(VP10_COMMON *cm) {
-#if CONFIG_VP9_POSTPROC
- vpx_free_frame_buffer(&cm->post_proc_buffer);
- vpx_free_frame_buffer(&cm->post_proc_buffer_int);
-#else
- (void)cm;
-#endif
-}
-
void vp10_free_context_buffers(VP10_COMMON *cm) {
int i;
cm->free_mi(cm);
+++ /dev/null
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vpx_config.h"
-#include "./vp10_rtcd.h"
-#include "./vpx_dsp_rtcd.h"
-#include "./vpx_scale_rtcd.h"
-
-#include "vp10/common/onyxc_int.h"
-#include "vp10/common/postproc.h"
-
-// TODO(jackychen): Replace this function with SSE2 code. There is
-// one SSE2 implementation in vp8, so will consider how to share it
-// between vp8 and vp9.
-static void filter_by_weight(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int block_size, int src_weight) {
- const int dst_weight = (1 << MFQE_PRECISION) - src_weight;
- const int rounding_bit = 1 << (MFQE_PRECISION - 1);
- int r, c;
-
- for (r = 0; r < block_size; r++) {
- for (c = 0; c < block_size; c++) {
- dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit)
- >> MFQE_PRECISION;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void vp10_filter_by_weight8x8_c(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int src_weight) {
- filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
-}
-
-void vp10_filter_by_weight16x16_c(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int src_weight) {
- filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
-}
-
-static void filter_by_weight32x32(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int weight) {
- vp10_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight);
- vp10_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride,
- weight);
- vp10_filter_by_weight16x16(src + src_stride * 16, src_stride,
- dst + dst_stride * 16, dst_stride, weight);
- vp10_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride,
- dst + dst_stride * 16 + 16, dst_stride, weight);
-}
-
-static void filter_by_weight64x64(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int weight) {
- filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);
- filter_by_weight32x32(src + 32, src_stride, dst + 32,
- dst_stride, weight);
- filter_by_weight32x32(src + src_stride * 32, src_stride,
- dst + dst_stride * 32, dst_stride, weight);
- filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,
- dst + dst_stride * 32 + 32, dst_stride, weight);
-}
-
-static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd,
- int yd_stride, const uint8_t *u, const uint8_t *v,
- int uv_stride, uint8_t *ud, uint8_t *vd,
- int uvd_stride, BLOCK_SIZE block_size,
- int weight) {
- if (block_size == BLOCK_16X16) {
- vp10_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight);
- vp10_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight);
- vp10_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight);
- } else if (block_size == BLOCK_32X32) {
- filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);
- vp10_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight);
- vp10_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight);
- } else if (block_size == BLOCK_64X64) {
- filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);
- filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);
- filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);
- }
-}
-
-// TODO(jackychen): Determine whether replace it with assembly code.
-static void copy_mem8x8(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride) {
- int r;
- for (r = 0; r < 8; r++) {
- memcpy(dst, src, 8);
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void copy_mem16x16(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride) {
- int r;
- for (r = 0; r < 16; r++) {
- memcpy(dst, src, 16);
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void copy_mem32x32(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride) {
- copy_mem16x16(src, src_stride, dst, dst_stride);
- copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);
- copy_mem16x16(src + src_stride * 16, src_stride,
- dst + dst_stride * 16, dst_stride);
- copy_mem16x16(src + src_stride * 16 + 16, src_stride,
- dst + dst_stride * 16 + 16, dst_stride);
-}
-
-void copy_mem64x64(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride) {
- copy_mem32x32(src, src_stride, dst, dst_stride);
- copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);
- copy_mem32x32(src + src_stride * 32, src_stride,
- dst + src_stride * 32, dst_stride);
- copy_mem32x32(src + src_stride * 32 + 32, src_stride,
- dst + src_stride * 32 + 32, dst_stride);
-}
-
-static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
- int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud,
- uint8_t *vd, int yd_stride, int uvd_stride,
- BLOCK_SIZE bs) {
- if (bs == BLOCK_16X16) {
- copy_mem16x16(y, y_stride, yd, yd_stride);
- copy_mem8x8(u, uv_stride, ud, uvd_stride);
- copy_mem8x8(v, uv_stride, vd, uvd_stride);
- } else if (bs == BLOCK_32X32) {
- copy_mem32x32(y, y_stride, yd, yd_stride);
- copy_mem16x16(u, uv_stride, ud, uvd_stride);
- copy_mem16x16(v, uv_stride, vd, uvd_stride);
- } else {
- copy_mem64x64(y, y_stride, yd, yd_stride);
- copy_mem32x32(u, uv_stride, ud, uvd_stride);
- copy_mem32x32(v, uv_stride, vd, uvd_stride);
- }
-}
-
-static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) {
- const int adj = qdiff >> MFQE_PRECISION;
- if (bs == BLOCK_16X16) {
- *sad_thr = 7 + adj;
- } else if (bs == BLOCK_32X32) {
- *sad_thr = 6 + adj;
- } else { // BLOCK_64X64
- *sad_thr = 5 + adj;
- }
- *vdiff_thr = 125 + qdiff;
-}
-
-static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
- const uint8_t *v, int y_stride, int uv_stride,
- uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride,
- int uvd_stride, int qdiff) {
- int sad, sad_thr, vdiff, vdiff_thr;
- uint32_t sse;
-
- get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
-
- if (bs == BLOCK_16X16) {
- vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
- sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
- } else if (bs == BLOCK_32X32) {
- vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
- sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
- } else /* if (bs == BLOCK_64X64) */ {
- vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
- sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
- }
-
- // vdiff > sad * 3 means vdiff should not be too small, otherwise,
- // it might be a lighting change in smooth area. When there is a
- // lighting change in smooth area, it is dangerous to do MFQE.
- if (sad > 1 && vdiff > sad * 3) {
- const int weight = 1 << MFQE_PRECISION;
- int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);
- // When ifactor equals weight, no MFQE is done.
- if (ifactor > weight) {
- ifactor = weight;
- }
- apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
- uvd_stride, bs, ifactor);
- } else {
- // Copy the block from current frame (i.e., no mfqe is done).
- copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,
- yd_stride, uvd_stride, bs);
- }
-}
-
-static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
- // Check the motion in current block(for inter frame),
- // or check the motion in the correlated block in last frame (for keyframe).
- const int mv_len_square = mi->mbmi.mv[0].as_mv.row *
- mi->mbmi.mv[0].as_mv.row +
- mi->mbmi.mv[0].as_mv.col *
- mi->mbmi.mv[0].as_mv.col;
- const int mv_threshold = 100;
- return mi->mbmi.mode >= NEARESTMV && // Not an intra block
- cur_bs >= BLOCK_16X16 &&
- mv_len_square <= mv_threshold;
-}
-
-// Process each partiton in a super block, recursively.
-static void mfqe_partition(VP10_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
- const uint8_t *y, const uint8_t *u,
- const uint8_t *v, int y_stride, int uv_stride,
- uint8_t *yd, uint8_t *ud, uint8_t *vd,
- int yd_stride, int uvd_stride) {
- int mi_offset, y_offset, uv_offset;
- const BLOCK_SIZE cur_bs = mi->mbmi.sb_type;
- const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
- const int bsl = b_width_log2_lookup[bs];
- PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
- const BLOCK_SIZE subsize = get_subsize(bs, partition);
-
- if (cur_bs < BLOCK_8X8) {
- // If there are blocks smaller than 8x8, it must be on the boundary.
- return;
- }
- // No MFQE on blocks smaller than 16x16
- if (bs == BLOCK_16X16) {
- partition = PARTITION_NONE;
- }
- if (bs == BLOCK_64X64) {
- mi_offset = 4;
- y_offset = 32;
- uv_offset = 16;
- } else {
- mi_offset = 2;
- y_offset = 16;
- uv_offset = 8;
- }
- switch (partition) {
- BLOCK_SIZE mfqe_bs, bs_tmp;
- case PARTITION_HORZ:
- if (bs == BLOCK_64X64) {
- mfqe_bs = BLOCK_64X32;
- bs_tmp = BLOCK_32X32;
- } else {
- mfqe_bs = BLOCK_32X16;
- bs_tmp = BLOCK_16X16;
- }
- if (mfqe_decision(mi, mfqe_bs)) {
- // Do mfqe on the first square partition.
- mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,
- yd, ud, vd, yd_stride, uvd_stride, qdiff);
- // Do mfqe on the second square partition.
- mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,
- y_stride, uv_stride, yd + y_offset, ud + uv_offset,
- vd + uv_offset, yd_stride, uvd_stride, qdiff);
- }
- if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {
- // Do mfqe on the first square partition.
- mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
- v + uv_offset * uv_stride, y_stride, uv_stride,
- yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
- vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
- // Do mfqe on the second square partition.
- mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
- u + uv_offset * uv_stride + uv_offset,
- v + uv_offset * uv_stride + uv_offset, y_stride,
- uv_stride, yd + y_offset * yd_stride + y_offset,
- ud + uv_offset * uvd_stride + uv_offset,
- vd + uv_offset * uvd_stride + uv_offset,
- yd_stride, uvd_stride, qdiff);
- }
- break;
- case PARTITION_VERT:
- if (bs == BLOCK_64X64) {
- mfqe_bs = BLOCK_32X64;
- bs_tmp = BLOCK_32X32;
- } else {
- mfqe_bs = BLOCK_16X32;
- bs_tmp = BLOCK_16X16;
- }
- if (mfqe_decision(mi, mfqe_bs)) {
- // Do mfqe on the first square partition.
- mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,
- yd, ud, vd, yd_stride, uvd_stride, qdiff);
- // Do mfqe on the second square partition.
- mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
- v + uv_offset * uv_stride, y_stride, uv_stride,
- yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
- vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
- }
- if (mfqe_decision(mi + mi_offset, mfqe_bs)) {
- // Do mfqe on the first square partition.
- mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,
- y_stride, uv_stride, yd + y_offset, ud + uv_offset,
- vd + uv_offset, yd_stride, uvd_stride, qdiff);
- // Do mfqe on the second square partition.
- mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
- u + uv_offset * uv_stride + uv_offset,
- v + uv_offset * uv_stride + uv_offset, y_stride,
- uv_stride, yd + y_offset * yd_stride + y_offset,
- ud + uv_offset * uvd_stride + uv_offset,
- vd + uv_offset * uvd_stride + uv_offset,
- yd_stride, uvd_stride, qdiff);
- }
- break;
- case PARTITION_NONE:
- if (mfqe_decision(mi, cur_bs)) {
- // Do mfqe on this partition.
- mfqe_block(cur_bs, y, u, v, y_stride, uv_stride,
- yd, ud, vd, yd_stride, uvd_stride, qdiff);
- } else {
- // Copy the block from current frame(i.e., no mfqe is done).
- copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,
- yd_stride, uvd_stride, bs);
- }
- break;
- case PARTITION_SPLIT:
- // Recursion on four square partitions, e.g. if bs is 64X64,
- // then look into four 32X32 blocks in it.
- mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,
- yd_stride, uvd_stride);
- mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,
- v + uv_offset, y_stride, uv_stride, yd + y_offset,
- ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);
- mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,
- y + y_offset * y_stride, u + uv_offset * uv_stride,
- v + uv_offset * uv_stride, y_stride, uv_stride,
- yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
- vd + uv_offset * uvd_stride, yd_stride, uvd_stride);
- mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset,
- subsize, y + y_offset * y_stride + y_offset,
- u + uv_offset * uv_stride + uv_offset,
- v + uv_offset * uv_stride + uv_offset, y_stride,
- uv_stride, yd + y_offset * yd_stride + y_offset,
- ud + uv_offset * uvd_stride + uv_offset,
- vd + uv_offset * uvd_stride + uv_offset,
- yd_stride, uvd_stride);
- break;
- default:
- assert(0);
- }
-}
-
-void vp10_mfqe(VP10_COMMON *cm) {
- int mi_row, mi_col;
- // Current decoded frame.
- const YV12_BUFFER_CONFIG *show = cm->frame_to_show;
- // Last decoded frame and will store the MFQE result.
- YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
-
-#if CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
- // TODO(any): Fix for ext parition types and 128 superblocks
- assert(0);
-#endif // CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
-
- // Loop through each super block.
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MAX_MIB_SIZE) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
- MODE_INFO *mi;
- MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col);
- // Motion Info in last frame.
- MODE_INFO *mi_prev = cm->postproc_state.prev_mi +
- (mi_row * cm->mi_stride + mi_col);
- const uint32_t y_stride = show->y_stride;
- const uint32_t uv_stride = show->uv_stride;
- const uint32_t yd_stride = dest->y_stride;
- const uint32_t uvd_stride = dest->uv_stride;
- const uint32_t row_offset_y = mi_row << 3;
- const uint32_t row_offset_uv = mi_row << 2;
- const uint32_t col_offset_y = mi_col << 3;
- const uint32_t col_offset_uv = mi_col << 2;
- const uint8_t *y = show->y_buffer + row_offset_y * y_stride +
- col_offset_y;
- const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride +
- col_offset_uv;
- const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride +
- col_offset_uv;
- uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y;
- uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride +
- col_offset_uv;
- uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride +
- col_offset_uv;
- if (frame_is_intra_only(cm)) {
- mi = mi_prev;
- } else {
- mi = mi_local;
- }
- mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,
- vd, yd_stride, uvd_stride);
- }
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP10_COMMON_MFQE_H_
-#define VP10_COMMON_MFQE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Multiframe Quality Enhancement.
-// The aim for MFQE is to replace pixel blocks in the current frame with
-// the correlated pixel blocks (with higher quality) in the last frame.
-// The replacement can only be taken in stationary blocks by checking
-// the motion of the blocks and other conditions such as the SAD of
-// the current block and correlated block, the variance of the block
-// difference, etc.
-void vp10_mfqe(struct VP10Common *cm);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP10_COMMON_MFQE_H_
+++ /dev/null
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vp10_rtcd.h"
-#include "vp10/common/onyxc_int.h"
-#include "vpx_dsp/mips/macros_msa.h"
-
-static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
- uint8_t *dst_ptr, int32_t dst_stride,
- int32_t src_weight) {
- int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
- int32_t row;
- uint64_t src0_d, src1_d, dst0_d, dst1_d;
- v16i8 src0 = { 0 };
- v16i8 src1 = { 0 };
- v16i8 dst0 = { 0 };
- v16i8 dst1 = { 0 };
- v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
-
- src_wt = __msa_fill_h(src_weight);
- dst_wt = __msa_fill_h(dst_weight);
-
- for (row = 2; row--;) {
- LD2(src_ptr, src_stride, src0_d, src1_d);
- src_ptr += (2 * src_stride);
- LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
- INSERT_D2_SB(src0_d, src1_d, src0);
- INSERT_D2_SB(dst0_d, dst1_d, dst0);
-
- LD2(src_ptr, src_stride, src0_d, src1_d);
- src_ptr += (2 * src_stride);
- LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
- INSERT_D2_SB(src0_d, src1_d, src1);
- INSERT_D2_SB(dst0_d, dst1_d, dst1);
-
- UNPCK_UB_SH(src0, src_r, src_l);
- UNPCK_UB_SH(dst0, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
- ST8x2_UB(dst0, dst_ptr, dst_stride);
- dst_ptr += (2 * dst_stride);
-
- UNPCK_UB_SH(src1, src_r, src_l);
- UNPCK_UB_SH(dst1, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
- ST8x2_UB(dst1, dst_ptr, dst_stride);
- dst_ptr += (2 * dst_stride);
- }
-}
-
-static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- uint8_t *dst_ptr,
- int32_t dst_stride,
- int32_t src_weight) {
- int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
- int32_t row;
- v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
- v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
-
- src_wt = __msa_fill_h(src_weight);
- dst_wt = __msa_fill_h(dst_weight);
-
- for (row = 4; row--;) {
- LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
-
- UNPCK_UB_SH(src0, src_r, src_l);
- UNPCK_UB_SH(dst0, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
- dst_ptr += dst_stride;
-
- UNPCK_UB_SH(src1, src_r, src_l);
- UNPCK_UB_SH(dst1, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
- dst_ptr += dst_stride;
-
- UNPCK_UB_SH(src2, src_r, src_l);
- UNPCK_UB_SH(dst2, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
- dst_ptr += dst_stride;
-
- UNPCK_UB_SH(src3, src_r, src_l);
- UNPCK_UB_SH(dst3, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
- dst_ptr += dst_stride;
- }
-}
-
-void vp10_filter_by_weight8x8_msa(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int src_weight) {
- filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight);
-}
-
-void vp10_filter_by_weight16x16_msa(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int src_weight) {
- filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight);
-}
#include "vp10/common/tile_common.h"
#include "vp10/common/restoration.h"
-#if CONFIG_VP9_POSTPROC
-#include "vp10/common/postproc.h"
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
int new_fb_idx;
-#if CONFIG_VP9_POSTPROC
- YV12_BUFFER_CONFIG post_proc_buffer;
- YV12_BUFFER_CONFIG post_proc_buffer_int;
-#endif
#if CONFIG_LOOP_RESTORATION
YV12_BUFFER_CONFIG tmp_loop_buf;
#endif // CONFIG_LOOP_RESTORATION
vpx_bit_depth_t bit_depth;
vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer
-#if CONFIG_VP9_POSTPROC
- struct postproc_state postproc_state;
-#endif
-
int error_resilient_mode;
#if !CONFIG_EXT_TILE
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "./vpx_config.h"
-#include "./vpx_dsp_rtcd.h"
-#include "./vpx_scale_rtcd.h"
-#include "./vp10_rtcd.h"
-
-#include "vpx_dsp/vpx_dsp_common.h"
-#include "vpx_ports/mem.h"
-#include "vpx_ports/system_state.h"
-#include "vpx_scale/vpx_scale.h"
-#include "vpx_scale/yv12config.h"
-
-#include "vp10/common/onyxc_int.h"
-#include "vp10/common/postproc.h"
-#include "vp10/common/textblit.h"
-
-#if CONFIG_VP9_POSTPROC
-static const short kernel5[] = {
- 1, 1, 4, 1, 1
-};
-
-const short vp10_rv[] = {
- 8, 5, 2, 2, 8, 12, 4, 9, 8, 3,
- 0, 3, 9, 0, 0, 0, 8, 3, 14, 4,
- 10, 1, 11, 14, 1, 14, 9, 6, 12, 11,
- 8, 6, 10, 0, 0, 8, 9, 0, 3, 14,
- 8, 11, 13, 4, 2, 9, 0, 3, 9, 6,
- 1, 2, 3, 14, 13, 1, 8, 2, 9, 7,
- 3, 3, 1, 13, 13, 6, 6, 5, 2, 7,
- 11, 9, 11, 8, 7, 3, 2, 0, 13, 13,
- 14, 4, 12, 5, 12, 10, 8, 10, 13, 10,
- 4, 14, 4, 10, 0, 8, 11, 1, 13, 7,
- 7, 14, 6, 14, 13, 2, 13, 5, 4, 4,
- 0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
- 8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
- 3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
- 3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
- 13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
- 5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
- 9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
- 4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
- 3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
- 11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
- 5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
- 0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
- 10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
- 4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
- 0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
- 8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
- 3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
- 3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
- 13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
- 5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
- 9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
- 4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
- 3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
- 11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
- 5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
- 0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
- 10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
- 4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
- 3, 8, 3, 7, 8, 5, 11, 4, 12, 3,
- 11, 9, 14, 8, 14, 13, 4, 3, 1, 2,
- 14, 6, 5, 4, 4, 11, 4, 6, 2, 1,
- 5, 8, 8, 12, 13, 5, 14, 10, 12, 13,
- 0, 9, 5, 5, 11, 10, 13, 9, 10, 13,
-};
-
-static const uint8_t q_diff_thresh = 20;
-static const uint8_t last_q_thresh = 170;
-
-void vp10_post_proc_down_and_across_c(const uint8_t *src_ptr,
- uint8_t *dst_ptr,
- int src_pixels_per_line,
- int dst_pixels_per_line,
- int rows,
- int cols,
- int flimit) {
- uint8_t const *p_src;
- uint8_t *p_dst;
- int row, col, i, v, kernel;
- int pitch = src_pixels_per_line;
- uint8_t d[8];
- (void)dst_pixels_per_line;
-
- for (row = 0; row < rows; row++) {
- /* post_proc_down for one row */
- p_src = src_ptr;
- p_dst = dst_ptr;
-
- for (col = 0; col < cols; col++) {
- kernel = 4;
- v = p_src[col];
-
- for (i = -2; i <= 2; i++) {
- if (abs(v - p_src[col + i * pitch]) > flimit)
- goto down_skip_convolve;
-
- kernel += kernel5[2 + i] * p_src[col + i * pitch];
- }
-
- v = (kernel >> 3);
- down_skip_convolve:
- p_dst[col] = v;
- }
-
- /* now post_proc_across */
- p_src = dst_ptr;
- p_dst = dst_ptr;
-
- for (i = 0; i < 8; i++)
- d[i] = p_src[i];
-
- for (col = 0; col < cols; col++) {
- kernel = 4;
- v = p_src[col];
-
- d[col & 7] = v;
-
- for (i = -2; i <= 2; i++) {
- if (abs(v - p_src[col + i]) > flimit)
- goto across_skip_convolve;
-
- kernel += kernel5[2 + i] * p_src[col + i];
- }
-
- d[col & 7] = (kernel >> 3);
- across_skip_convolve:
-
- if (col >= 2)
- p_dst[col - 2] = d[(col - 2) & 7];
- }
-
- /* handle the last two pixels */
- p_dst[col - 2] = d[(col - 2) & 7];
- p_dst[col - 1] = d[(col - 1) & 7];
-
-
- /* next row */
- src_ptr += pitch;
- dst_ptr += pitch;
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp10_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr,
- uint16_t *dst_ptr,
- int src_pixels_per_line,
- int dst_pixels_per_line,
- int rows,
- int cols,
- int flimit) {
- uint16_t const *p_src;
- uint16_t *p_dst;
- int row, col, i, v, kernel;
- int pitch = src_pixels_per_line;
- uint16_t d[8];
-
- for (row = 0; row < rows; row++) {
- // post_proc_down for one row.
- p_src = src_ptr;
- p_dst = dst_ptr;
-
- for (col = 0; col < cols; col++) {
- kernel = 4;
- v = p_src[col];
-
- for (i = -2; i <= 2; i++) {
- if (abs(v - p_src[col + i * pitch]) > flimit)
- goto down_skip_convolve;
-
- kernel += kernel5[2 + i] * p_src[col + i * pitch];
- }
-
- v = (kernel >> 3);
-
- down_skip_convolve:
- p_dst[col] = v;
- }
-
- /* now post_proc_across */
- p_src = dst_ptr;
- p_dst = dst_ptr;
-
- for (i = 0; i < 8; i++)
- d[i] = p_src[i];
-
- for (col = 0; col < cols; col++) {
- kernel = 4;
- v = p_src[col];
-
- d[col & 7] = v;
-
- for (i = -2; i <= 2; i++) {
- if (abs(v - p_src[col + i]) > flimit)
- goto across_skip_convolve;
-
- kernel += kernel5[2 + i] * p_src[col + i];
- }
-
- d[col & 7] = (kernel >> 3);
-
- across_skip_convolve:
- if (col >= 2)
- p_dst[col - 2] = d[(col - 2) & 7];
- }
-
- /* handle the last two pixels */
- p_dst[col - 2] = d[(col - 2) & 7];
- p_dst[col - 1] = d[(col - 1) & 7];
-
-
- /* next row */
- src_ptr += pitch;
- dst_ptr += dst_pixels_per_line;
- }
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-static int q2mbl(int x) {
- if (x < 20) x = 20;
-
- x = 50 + (x - 50) * 10 / 8;
- return x * x / 3;
-}
-
-void vp10_mbpost_proc_across_ip_c(uint8_t *src, int pitch,
- int rows, int cols, int flimit) {
- int r, c, i;
- uint8_t *s = src;
- uint8_t d[16];
-
- for (r = 0; r < rows; r++) {
- int sumsq = 0;
- int sum = 0;
-
- for (i = -8; i <= 6; i++) {
- sumsq += s[i] * s[i];
- sum += s[i];
- d[i + 8] = 0;
- }
-
- for (c = 0; c < cols + 8; c++) {
- int x = s[c + 7] - s[c - 8];
- int y = s[c + 7] + s[c - 8];
-
- sum += x;
- sumsq += x * y;
-
- d[c & 15] = s[c];
-
- if (sumsq * 15 - sum * sum < flimit) {
- d[c & 15] = (8 + sum + s[c]) >> 4;
- }
-
- s[c - 8] = d[(c - 8) & 15];
- }
- s += pitch;
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp10_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch,
- int rows, int cols, int flimit) {
- int r, c, i;
-
- uint16_t *s = src;
- uint16_t d[16];
-
-
- for (r = 0; r < rows; r++) {
- int sumsq = 0;
- int sum = 0;
-
- for (i = -8; i <= 6; i++) {
- sumsq += s[i] * s[i];
- sum += s[i];
- d[i + 8] = 0;
- }
-
- for (c = 0; c < cols + 8; c++) {
- int x = s[c + 7] - s[c - 8];
- int y = s[c + 7] + s[c - 8];
-
- sum += x;
- sumsq += x * y;
-
- d[c & 15] = s[c];
-
- if (sumsq * 15 - sum * sum < flimit) {
- d[c & 15] = (8 + sum + s[c]) >> 4;
- }
-
- s[c - 8] = d[(c - 8) & 15];
- }
-
- s += pitch;
- }
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-void vp10_mbpost_proc_down_c(uint8_t *dst, int pitch,
- int rows, int cols, int flimit) {
- int r, c, i;
- const short *rv3 = &vp10_rv[63 & rand()]; // NOLINT
-
- for (c = 0; c < cols; c++) {
- uint8_t *s = &dst[c];
- int sumsq = 0;
- int sum = 0;
- uint8_t d[16];
- const short *rv2 = rv3 + ((c * 17) & 127);
-
- for (i = -8; i <= 6; i++) {
- sumsq += s[i * pitch] * s[i * pitch];
- sum += s[i * pitch];
- }
-
- for (r = 0; r < rows + 8; r++) {
- sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
- sum += s[7 * pitch] - s[-8 * pitch];
- d[r & 15] = s[0];
-
- if (sumsq * 15 - sum * sum < flimit) {
- d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
- }
-
- s[-8 * pitch] = d[(r - 8) & 15];
- s += pitch;
- }
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp10_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch,
- int rows, int cols, int flimit) {
- int r, c, i;
- const int16_t *rv3 = &vp10_rv[63 & rand()]; // NOLINT
-
- for (c = 0; c < cols; c++) {
- uint16_t *s = &dst[c];
- int sumsq = 0;
- int sum = 0;
- uint16_t d[16];
- const int16_t *rv2 = rv3 + ((c * 17) & 127);
-
- for (i = -8; i <= 6; i++) {
- sumsq += s[i * pitch] * s[i * pitch];
- sum += s[i * pitch];
- }
-
- for (r = 0; r < rows + 8; r++) {
- sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
- sum += s[7 * pitch] - s[-8 * pitch];
- d[r & 15] = s[0];
-
- if (sumsq * 15 - sum * sum < flimit) {
- d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
- }
-
- s[-8 * pitch] = d[(r - 8) & 15];
- s += pitch;
- }
- }
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag) {
- double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
- int ppl = (int)(level + .5);
- (void) low_var_thresh;
- (void) flag;
-
-#if CONFIG_VP9_HIGHBITDEPTH
- if (source->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->y_buffer),
- CONVERT_TO_SHORTPTR(post->y_buffer),
- source->y_stride, post->y_stride,
- source->y_height, source->y_width,
- ppl);
-
- vp10_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer),
- post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer),
- post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->u_buffer),
- CONVERT_TO_SHORTPTR(post->u_buffer),
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width,
- ppl);
- vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->v_buffer),
- CONVERT_TO_SHORTPTR(post->v_buffer),
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width,
- ppl);
- } else {
- vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer,
- source->y_stride, post->y_stride,
- source->y_height, source->y_width, ppl);
-
- vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer,
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width, ppl);
- vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer,
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width, ppl);
- }
-#else
- vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer,
- source->y_stride, post->y_stride,
- source->y_height, source->y_width, ppl);
-
- vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer,
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width, ppl);
- vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer,
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width, ppl);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-}
-
-void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- int q) {
- const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q
- + 0.0065 + 0.5);
- int i;
-
- const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
- const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
- const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width};
- const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height};
-
- uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
- const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
-#if CONFIG_VP9_HIGHBITDEPTH
- assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
- (dst->flags & YV12_FLAG_HIGHBITDEPTH));
- if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(srcs[i]),
- CONVERT_TO_SHORTPTR(dsts[i]),
- src_strides[i], dst_strides[i],
- src_heights[i], src_widths[i], ppl);
- } else {
- vp10_post_proc_down_and_across(srcs[i], dsts[i],
- src_strides[i], dst_strides[i],
- src_heights[i], src_widths[i], ppl);
- }
-#else
- vp10_post_proc_down_and_across(srcs[i], dsts[i],
- src_strides[i], dst_strides[i],
- src_heights[i], src_widths[i], ppl);
-#endif // CONFIG_VP9_HIGHBITDEPTH
- }
-}
-
-void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- int q) {
- const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q
- + 0.0065 + 0.5);
- int i;
-
- const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
- const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
- const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width};
- const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height};
-
- uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
- const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- const int src_stride = src_strides[i];
- const int src_width = src_widths[i] - 4;
- const int src_height = src_heights[i] - 4;
- const int dst_stride = dst_strides[i];
-
-#if CONFIG_VP9_HIGHBITDEPTH
- assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
- (dst->flags & YV12_FLAG_HIGHBITDEPTH));
- if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *const src_plane = CONVERT_TO_SHORTPTR(
- srcs[i] + 2 * src_stride + 2);
- uint16_t *const dst_plane = CONVERT_TO_SHORTPTR(
- dsts[i] + 2 * dst_stride + 2);
- vp10_highbd_post_proc_down_and_across(src_plane, dst_plane, src_stride,
- dst_stride, src_height, src_width,
- ppl);
- } else {
- const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2;
- uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2;
-
- vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride,
- dst_stride, src_height, src_width, ppl);
- }
-#else
- const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2;
- uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2;
- vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride, dst_stride,
- src_height, src_width, ppl);
-#endif
- }
-}
-
-static double gaussian(double sigma, double mu, double x) {
- return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
- (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
-}
-
-static void fillrd(struct postproc_state *state, int q, int a) {
- char char_dist[300];
-
- double sigma;
- int ai = a, qi = q, i;
-
- vpx_clear_system_state();
-
- sigma = ai + .5 + .6 * (63 - qi) / 63.0;
-
- /* set up a lookup table of 256 entries that matches
- * a gaussian distribution with sigma determined by q.
- */
- {
- int next, j;
-
- next = 0;
-
- for (i = -32; i < 32; i++) {
- int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i));
-
- if (a_i) {
- for (j = 0; j < a_i; j++) {
- char_dist[next + j] = (char) i;
- }
-
- next = next + j;
- }
- }
-
- for (; next < 256; next++)
- char_dist[next] = 0;
- }
-
- for (i = 0; i < 3072; i++) {
- state->noise[i] = char_dist[rand() & 0xff]; // NOLINT
- }
-
- for (i = 0; i < 16; i++) {
- state->blackclamp[i] = -char_dist[0];
- state->whiteclamp[i] = -char_dist[0];
- state->bothclamp[i] = -2 * char_dist[0];
- }
-
- state->last_q = q;
- state->last_noise = a;
-}
-
-static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
- // Current mip will be the prev_mip for the next frame.
- MODE_INFO *temp = cm->postproc_state.prev_mip;
- cm->postproc_state.prev_mip = cm->mip;
- cm->mip = temp;
-
- // Update the upper left visible macroblock ptrs.
- cm->mi = cm->mip + cm->mi_stride + 1;
- cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1;
-}
-
-int vp10_post_proc_frame(struct VP10Common *cm,
- YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *ppflags) {
- const int q = VPXMIN(105, cm->lf.filter_level * 2);
- const int flags = ppflags->post_proc_flag;
- YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer;
- struct postproc_state *const ppstate = &cm->postproc_state;
-
- if (!cm->frame_to_show)
- return -1;
-
- if (!flags) {
- *dest = *cm->frame_to_show;
- return 0;
- }
-
- vpx_clear_system_state();
-
- // Alloc memory for prev_mip in the first frame.
- if (cm->current_video_frame == 1) {
- cm->postproc_state.last_base_qindex = cm->base_qindex;
- cm->postproc_state.last_frame_valid = 1;
- ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip));
- if (!ppstate->prev_mip) {
- return 1;
- }
- ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1;
- memset(ppstate->prev_mip, 0,
- cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
- }
-
- // Allocate post_proc_buffer_int if needed.
- if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) {
- if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) {
- const int width = ALIGN_POWER_OF_TWO(cm->width, 4);
- const int height = ALIGN_POWER_OF_TWO(cm->height, 4);
-
- if (vpx_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif // CONFIG_VP9_HIGHBITDEPTH
- VPX_ENC_BORDER_IN_PIXELS,
- cm->byte_alignment) < 0) {
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate MFQE framebuffer");
- }
-
- // Ensure that postproc is set to all 0s so that post proc
- // doesn't pull random data in from edge.
- memset(cm->post_proc_buffer_int.buffer_alloc, 128,
- cm->post_proc_buffer.frame_size);
- }
- }
-
- if (vpx_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment,
- NULL, NULL, NULL) < 0)
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate post-processing buffer");
-
- if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 &&
- cm->postproc_state.last_frame_valid && cm->bit_depth == 8 &&
- cm->postproc_state.last_base_qindex <= last_q_thresh &&
- cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) {
- vp10_mfqe(cm);
- // TODO(jackychen): Consider whether enable deblocking by default
- // if mfqe is enabled. Need to take both the quality and the speed
- // into consideration.
- if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) {
- vp8_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int);
- }
- if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) {
- deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf,
- q + (ppflags->deblocking_level - 5) * 10,
- 1, 0);
- } else if (flags & VP9D_DEBLOCK) {
- vp10_deblock(&cm->post_proc_buffer_int, ppbuf, q);
- } else {
- vp8_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf);
- }
- } else if (flags & VP9D_DEMACROBLOCK) {
- deblock_and_de_macro_block(cm->frame_to_show, ppbuf,
- q + (ppflags->deblocking_level - 5) * 10, 1, 0);
- } else if (flags & VP9D_DEBLOCK) {
- vp10_deblock(cm->frame_to_show, ppbuf, q);
- } else {
- vp8_yv12_copy_frame(cm->frame_to_show, ppbuf);
- }
-
- cm->postproc_state.last_base_qindex = cm->base_qindex;
- cm->postproc_state.last_frame_valid = 1;
-
- if (flags & VP9D_ADDNOISE) {
- const int noise_level = ppflags->noise_level;
- if (ppstate->last_q != q ||
- ppstate->last_noise != noise_level) {
- fillrd(ppstate, 63 - q, noise_level);
- }
-
- vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
- ppstate->whiteclamp, ppstate->bothclamp,
- ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
- }
-
- *dest = *ppbuf;
-
- /* handle problem with extending borders */
- dest->y_width = cm->width;
- dest->y_height = cm->height;
- dest->uv_width = dest->y_width >> cm->subsampling_x;
- dest->uv_height = dest->y_height >> cm->subsampling_y;
-
- swap_mi_and_prev_mi(cm);
- return 0;
-}
-#endif // CONFIG_VP9_POSTPROC
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP10_COMMON_POSTPROC_H_
-#define VP10_COMMON_POSTPROC_H_
-
-#include "vpx_ports/mem.h"
-#include "vpx_scale/yv12config.h"
-#include "vp10/common/blockd.h"
-#include "vp10/common/mfqe.h"
-#include "vp10/common/ppflags.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct postproc_state {
- int last_q;
- int last_noise;
- char noise[3072];
- int last_base_qindex;
- int last_frame_valid;
- MODE_INFO *prev_mip;
- MODE_INFO *prev_mi;
- DECLARE_ALIGNED(16, char, blackclamp[16]);
- DECLARE_ALIGNED(16, char, whiteclamp[16]);
- DECLARE_ALIGNED(16, char, bothclamp[16]);
-};
-
-struct VP10Common;
-
-#define MFQE_PRECISION 4
-
-int vp10_post_proc_frame(struct VP10Common *cm,
- YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *flags);
-
-void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
-
-void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP10_COMMON_POSTPROC_H_
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP10_COMMON_PPFLAGS_H_
-#define VP10_COMMON_PPFLAGS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum {
- VP9D_NOFILTERING = 0,
- VP9D_DEBLOCK = 1 << 0,
- VP9D_DEMACROBLOCK = 1 << 1,
- VP9D_ADDNOISE = 1 << 2,
- VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3,
- VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4,
- VP9D_DEBUG_TXT_DC_DIFF = 1 << 5,
- VP9D_DEBUG_TXT_RATE_INFO = 1 << 6,
- VP9D_DEBUG_DRAW_MV = 1 << 7,
- VP9D_DEBUG_CLR_BLK_MODES = 1 << 8,
- VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9,
- VP9D_MFQE = 1 << 10
-};
-
-typedef struct {
- int post_proc_flag;
- int deblocking_level;
- int noise_level;
-} vp10_ppflags_t;
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP10_COMMON_PPFLAGS_H_
$avx2_x86_64 = 'avx2';
}
-#
-# post proc
-#
-if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
-add_proto qw/void vp10_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
-specialize qw/vp10_mbpost_proc_down sse2/;
-$vp10_mbpost_proc_down_sse2=vp10_mbpost_proc_down_xmm;
-
-add_proto qw/void vp10_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
-specialize qw/vp10_mbpost_proc_across_ip sse2/;
-$vp10_mbpost_proc_across_ip_sse2=vp10_mbpost_proc_across_ip_xmm;
-
-add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
-specialize qw/vp10_post_proc_down_and_across sse2/;
-$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
-
-add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
-specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
-
-add_proto qw/void vp10_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
-specialize qw/vp10_filter_by_weight8x8 sse2 msa/;
-}
-
#
# 10/12-tap convolution filters
#
add_proto qw/void vp10_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vp10_highbd_convolve8_avg_vert/, "$sse2_x86_64";
- #
- # post proc
- #
- if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
- add_proto qw/void vp10_highbd_mbpost_proc_down/, "uint16_t *dst, int pitch, int rows, int cols, int flimit";
- specialize qw/vp10_highbd_mbpost_proc_down/;
-
- add_proto qw/void vp10_highbd_mbpost_proc_across_ip/, "uint16_t *src, int pitch, int rows, int cols, int flimit";
- specialize qw/vp10_highbd_mbpost_proc_across_ip/;
-
- add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
- specialize qw/vp10_highbd_post_proc_down_and_across/;
- }
-
#
# dct
#
# ENCODEMB INVOKE
-#
-# Denoiser
-#
-if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
- add_proto qw/int vp10_denoiser_filter/, "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude";
- specialize qw/vp10_denoiser_filter sse2/;
-}
-
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
+++ /dev/null
-;
-; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-; This file is a duplicate of mfqe_sse2.asm in VP8.
-; TODO(jackychen): Find a way to fix the duplicate.
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp10_filter_by_weight16x16_sse2
-;(
-; unsigned char *src,
-; int src_stride,
-; unsigned char *dst,
-; int dst_stride,
-; int src_weight
-;)
-global sym(vp10_filter_by_weight16x16_sse2) PRIVATE
-sym(vp10_filter_by_weight16x16_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movd xmm0, arg(4) ; src_weight
- pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
- punpcklqdq xmm0, xmm0 ; replicate to all hi words
-
- movdqa xmm1, [GLOBAL(tMFQE)]
- psubw xmm1, xmm0 ; dst_weight
-
- mov rax, arg(0) ; src
- mov rsi, arg(1) ; src_stride
- mov rdx, arg(2) ; dst
- mov rdi, arg(3) ; dst_stride
-
- mov rcx, 16 ; loop count
- pxor xmm6, xmm6
-
-.combine
- movdqa xmm2, [rax]
- movdqa xmm4, [rdx]
- add rax, rsi
-
- ; src * src_weight
- movdqa xmm3, xmm2
- punpcklbw xmm2, xmm6
- punpckhbw xmm3, xmm6
- pmullw xmm2, xmm0
- pmullw xmm3, xmm0
-
- ; dst * dst_weight
- movdqa xmm5, xmm4
- punpcklbw xmm4, xmm6
- punpckhbw xmm5, xmm6
- pmullw xmm4, xmm1
- pmullw xmm5, xmm1
-
- ; sum, round and shift
- paddw xmm2, xmm4
- paddw xmm3, xmm5
- paddw xmm2, [GLOBAL(tMFQE_round)]
- paddw xmm3, [GLOBAL(tMFQE_round)]
- psrlw xmm2, 4
- psrlw xmm3, 4
-
- packuswb xmm2, xmm3
- movdqa [rdx], xmm2
- add rdx, rdi
-
- dec rcx
- jnz .combine
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
-
- ret
-
-;void vp10_filter_by_weight8x8_sse2
-;(
-; unsigned char *src,
-; int src_stride,
-; unsigned char *dst,
-; int dst_stride,
-; int src_weight
-;)
-global sym(vp10_filter_by_weight8x8_sse2) PRIVATE
-sym(vp10_filter_by_weight8x8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movd xmm0, arg(4) ; src_weight
- pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
- punpcklqdq xmm0, xmm0 ; replicate to all hi words
-
- movdqa xmm1, [GLOBAL(tMFQE)]
- psubw xmm1, xmm0 ; dst_weight
-
- mov rax, arg(0) ; src
- mov rsi, arg(1) ; src_stride
- mov rdx, arg(2) ; dst
- mov rdi, arg(3) ; dst_stride
-
- mov rcx, 8 ; loop count
- pxor xmm4, xmm4
-
-.combine
- movq xmm2, [rax]
- movq xmm3, [rdx]
- add rax, rsi
-
- ; src * src_weight
- punpcklbw xmm2, xmm4
- pmullw xmm2, xmm0
-
- ; dst * dst_weight
- punpcklbw xmm3, xmm4
- pmullw xmm3, xmm1
-
- ; sum, round and shift
- paddw xmm2, xmm3
- paddw xmm2, [GLOBAL(tMFQE_round)]
- psrlw xmm2, 4
-
- packuswb xmm2, xmm4
- movq [rdx], xmm2
- add rdx, rdi
-
- dec rcx
- jnz .combine
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
-
- ret
-
-;void vp10_variance_and_sad_16x16_sse2 | arg
-;(
-; unsigned char *src1, 0
-; int stride1, 1
-; unsigned char *src2, 2
-; int stride2, 3
-; unsigned int *variance, 4
-; unsigned int *sad, 5
-;)
-global sym(vp10_variance_and_sad_16x16_sse2) PRIVATE
-sym(vp10_variance_and_sad_16x16_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rax, arg(0) ; src1
- mov rcx, arg(1) ; stride1
- mov rdx, arg(2) ; src2
- mov rdi, arg(3) ; stride2
-
- mov rsi, 16 ; block height
-
- ; Prep accumulator registers
- pxor xmm3, xmm3 ; SAD
- pxor xmm4, xmm4 ; sum of src2
- pxor xmm5, xmm5 ; sum of src2^2
-
- ; Because we're working with the actual output frames
- ; we can't depend on any kind of data alignment.
-.accumulate
- movdqa xmm0, [rax] ; src1
- movdqa xmm1, [rdx] ; src2
- add rax, rcx ; src1 + stride1
- add rdx, rdi ; src2 + stride2
-
- ; SAD(src1, src2)
- psadbw xmm0, xmm1
- paddusw xmm3, xmm0
-
- ; SUM(src2)
- pxor xmm2, xmm2
- psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0
- paddusw xmm4, xmm2
-
- ; pmaddubsw would be ideal if it took two unsigned values. instead,
- ; it expects a signed and an unsigned value. so instead we zero extend
- ; and operate on words.
- pxor xmm2, xmm2
- movdqa xmm0, xmm1
- punpcklbw xmm0, xmm2
- punpckhbw xmm1, xmm2
- pmaddwd xmm0, xmm0
- pmaddwd xmm1, xmm1
- paddd xmm5, xmm0
- paddd xmm5, xmm1
-
- sub rsi, 1
- jnz .accumulate
-
- ; phaddd only operates on adjacent double words.
- ; Finalize SAD and store
- movdqa xmm0, xmm3
- psrldq xmm0, 8
- paddusw xmm0, xmm3
- paddd xmm0, [GLOBAL(t128)]
- psrld xmm0, 8
-
- mov rax, arg(5)
- movd [rax], xmm0
-
- ; Accumulate sum of src2
- movdqa xmm0, xmm4
- psrldq xmm0, 8
- paddusw xmm0, xmm4
- ; Square src2. Ignore high value
- pmuludq xmm0, xmm0
- psrld xmm0, 8
-
- ; phaddw could be used to sum adjacent values but we want
- ; all the values summed. promote to doubles, accumulate,
- ; shift and sum
- pxor xmm2, xmm2
- movdqa xmm1, xmm5
- punpckldq xmm1, xmm2
- punpckhdq xmm5, xmm2
- paddd xmm1, xmm5
- movdqa xmm2, xmm1
- psrldq xmm1, 8
- paddd xmm1, xmm2
-
- psubd xmm1, xmm0
-
- ; (variance + 128) >> 8
- paddd xmm1, [GLOBAL(t128)]
- psrld xmm1, 8
- mov rax, arg(4)
-
- movd [rax], xmm1
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-t128:
-%ifndef __NASM_VER__
- ddq 128
-%elif CONFIG_BIG_ENDIAN
- dq 0, 128
-%else
- dq 128, 0
-%endif
-align 16
-tMFQE: ; 1 << MFQE_PRECISION
- times 8 dw 0x10
-align 16
-tMFQE_round: ; 1 << (MFQE_PRECISION - 1)
- times 8 dw 0x08
+++ /dev/null
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp10_post_proc_down_and_across_xmm
-;(
-; unsigned char *src_ptr,
-; unsigned char *dst_ptr,
-; int src_pixels_per_line,
-; int dst_pixels_per_line,
-; int rows,
-; int cols,
-; int flimit
-;)
-global sym(vp10_post_proc_down_and_across_xmm) PRIVATE
-sym(vp10_post_proc_down_and_across_xmm):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- ALIGN_STACK 16, rax
- ; move the global rd onto the stack, since we don't have enough registers
- ; to do PIC addressing
- movdqa xmm0, [GLOBAL(rd42)]
- sub rsp, 16
- movdqa [rsp], xmm0
-%define RD42 [rsp]
-%else
-%define RD42 [GLOBAL(rd42)]
-%endif
-
-
- movd xmm2, dword ptr arg(6) ;flimit
- punpcklwd xmm2, xmm2
- punpckldq xmm2, xmm2
- punpcklqdq xmm2, xmm2
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(1) ;dst_ptr
-
- movsxd rcx, DWORD PTR arg(4) ;rows
- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
- pxor xmm0, xmm0 ; mm0 = 00000000
-
-.nextrow:
-
- xor rdx, rdx ; clear out rdx for use as loop counter
-.nextcol:
- movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7
- punpcklbw xmm3, xmm0 ; mm3 = p0..p3
- movdqa xmm1, xmm3 ; mm1 = p0..p3
- psllw xmm3, 2 ;
-
- movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm6
-
- ; thresholding
- movdqa xmm7, xmm1 ; mm7 = r0 p0..p3
- psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3
- paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
- pcmpgtw xmm7, xmm2
-
- movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
-
- neg rax
- movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7
- punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3
- paddusw xmm3, xmm4 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3
- paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
-
- paddusw xmm3, RD42 ; mm3 += round value
- psraw xmm3, 3 ; mm3 /= 8
-
- pand xmm1, xmm7 ; mm1 select vals > thresh from source
- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
- paddusw xmm1, xmm7 ; combination
-
- packuswb xmm1, xmm0 ; pack to bytes
- movq QWORD PTR [rdi], xmm1 ;
-
- neg rax ; pitch is positive
- add rsi, 8
- add rdi, 8
-
- add rdx, 8
- cmp edx, dword arg(5) ;cols
-
- jl .nextcol
-
- ; done with the all cols, start the across filtering in place
- sub rsi, rdx
- sub rdi, rdx
-
- xor rdx, rdx
- movq mm0, QWORD PTR [rdi-8];
-
-.acrossnextcol:
- movq xmm7, QWORD PTR [rdi +rdx -2]
- movd xmm4, DWORD PTR [rdi +rdx +6]
-
- pslldq xmm4, 8
- por xmm4, xmm7
-
- movdqa xmm3, xmm4
- psrldq xmm3, 2
- punpcklbw xmm3, xmm0 ; mm3 = p0..p3
- movdqa xmm1, xmm3 ; mm1 = p0..p3
- psllw xmm3, 2
-
-
- movdqa xmm5, xmm4
- psrldq xmm5, 3
- punpcklbw xmm5, xmm0 ; mm5 = p1..p4
- paddusw xmm3, xmm5 ; mm3 += mm6
-
- ; thresholding
- movdqa xmm7, xmm1 ; mm7 = p0..p3
- psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm7, xmm2
-
- movdqa xmm5, xmm4
- psrldq xmm5, 4
- punpcklbw xmm5, xmm0 ; mm5 = p2..p5
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
-
- movdqa xmm5, xmm4 ; mm5 = p-2..p5
- punpcklbw xmm5, xmm0 ; mm5 = p-2..p1
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- psrldq xmm4, 1 ; mm4 = p-1..p5
- punpcklbw xmm4, xmm0 ; mm4 = p-1..p2
- paddusw xmm3, xmm4 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4
- psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- paddusw xmm3, RD42 ; mm3 += round value
- psraw xmm3, 3 ; mm3 /= 8
-
- pand xmm1, xmm7 ; mm1 select vals > thresh from source
- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
- paddusw xmm1, xmm7 ; combination
-
- packuswb xmm1, xmm0 ; pack to bytes
- movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes
- movdq2q mm0, xmm1
-
- add rdx, 8
- cmp edx, dword arg(5) ;cols
- jl .acrossnextcol;
-
- ; last 8 pixels
- movq QWORD PTR [rdi+rdx-8], mm0
-
- ; done with this rwo
- add rsi,rax ; next line
- mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch?
- add rdi,rax ; next destination
- mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch?
-
- dec rcx ; decrement count
- jnz .nextrow ; next row
-
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- add rsp,16
- pop rsp
-%endif
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef RD42
-
-
-;void vp10_mbpost_proc_down_xmm(unsigned char *dst,
-; int pitch, int rows, int cols,int flimit)
-extern sym(vp10_rv)
-global sym(vp10_mbpost_proc_down_xmm) PRIVATE
-sym(vp10_mbpost_proc_down_xmm):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 128+16
-
- ; unsigned char d[16][8] at [rsp]
- ; create flimit2 at [rsp+128]
- mov eax, dword ptr arg(4) ;flimit
- mov [rsp+128], eax
- mov [rsp+128+4], eax
- mov [rsp+128+8], eax
- mov [rsp+128+12], eax
-%define flimit4 [rsp+128]
-
-%if ABI_IS_32BIT=0
- lea r8, [GLOBAL(sym(vp10_rv))]
-%endif
-
- ;rows +=8;
- add dword arg(2), 8
-
- ;for(c=0; c<cols; c+=8)
-.loop_col:
- mov rsi, arg(0) ; s
- pxor xmm0, xmm0 ;
-
- movsxd rax, dword ptr arg(1) ;pitch ;
- neg rax ; rax = -pitch
-
- lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8]
- neg rax
-
-
- pxor xmm5, xmm5
- pxor xmm6, xmm6 ;
-
- pxor xmm7, xmm7 ;
- mov rdi, rsi
-
- mov rcx, 15 ;
-
-.loop_initvar:
- movq xmm1, QWORD PTR [rdi];
- punpcklbw xmm1, xmm0 ;
-
- paddw xmm5, xmm1 ;
- pmullw xmm1, xmm1 ;
-
- movdqa xmm2, xmm1 ;
- punpcklwd xmm1, xmm0 ;
-
- punpckhwd xmm2, xmm0 ;
- paddd xmm6, xmm1 ;
-
- paddd xmm7, xmm2 ;
- lea rdi, [rdi+rax] ;
-
- dec rcx
- jne .loop_initvar
- ;save the var and sum
- xor rdx, rdx
-.loop_row:
- movq xmm1, QWORD PTR [rsi] ; [s-pitch*8]
- movq xmm2, QWORD PTR [rdi] ; [s+pitch*7]
-
- punpcklbw xmm1, xmm0
- punpcklbw xmm2, xmm0
-
- paddw xmm5, xmm2
- psubw xmm5, xmm1
-
- pmullw xmm2, xmm2
- movdqa xmm4, xmm2
-
- punpcklwd xmm2, xmm0
- punpckhwd xmm4, xmm0
-
- paddd xmm6, xmm2
- paddd xmm7, xmm4
-
- pmullw xmm1, xmm1
- movdqa xmm2, xmm1
-
- punpcklwd xmm1, xmm0
- psubd xmm6, xmm1
-
- punpckhwd xmm2, xmm0
- psubd xmm7, xmm2
-
-
- movdqa xmm3, xmm6
- pslld xmm3, 4
-
- psubd xmm3, xmm6
- movdqa xmm1, xmm5
-
- movdqa xmm4, xmm5
- pmullw xmm1, xmm1
-
- pmulhw xmm4, xmm4
- movdqa xmm2, xmm1
-
- punpcklwd xmm1, xmm4
- punpckhwd xmm2, xmm4
-
- movdqa xmm4, xmm7
- pslld xmm4, 4
-
- psubd xmm4, xmm7
-
- psubd xmm3, xmm1
- psubd xmm4, xmm2
-
- psubd xmm3, flimit4
- psubd xmm4, flimit4
-
- psrad xmm3, 31
- psrad xmm4, 31
-
- packssdw xmm3, xmm4
- packsswb xmm3, xmm0
-
- movq xmm1, QWORD PTR [rsi+rax*8]
-
- movq xmm2, xmm1
- punpcklbw xmm1, xmm0
-
- paddw xmm1, xmm5
- mov rcx, rdx
-
- and rcx, 127
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- push rax
- lea rax, [GLOBAL(sym(vp10_rv))]
- movdqu xmm4, [rax + rcx*2] ;vp10_rv[rcx*2]
- pop rax
-%elif ABI_IS_32BIT=0
- movdqu xmm4, [r8 + rcx*2] ;vp10_rv[rcx*2]
-%else
- movdqu xmm4, [sym(vp10_rv) + rcx*2]
-%endif
-
- paddw xmm1, xmm4
- ;paddw xmm1, eight8s
- psraw xmm1, 4
-
- packuswb xmm1, xmm0
- pand xmm1, xmm3
-
- pandn xmm3, xmm2
- por xmm1, xmm3
-
- and rcx, 15
- movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8]
-
- mov rcx, rdx
- sub rcx, 8
-
- and rcx, 15
- movq mm0, [rsp + rcx*8] ;d[rcx*8]
-
- movq [rsi], mm0
- lea rsi, [rsi+rax]
-
- lea rdi, [rdi+rax]
- add rdx, 1
-
- cmp edx, dword arg(2) ;rows
- jl .loop_row
-
- add dword arg(0), 8 ; s += 8
- sub dword arg(3), 8 ; cols -= 8
- cmp dword arg(3), 0
- jg .loop_col
-
- add rsp, 128+16
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef flimit4
-
-
-;void vp10_mbpost_proc_across_ip_xmm(unsigned char *src,
-; int pitch, int rows, int cols,int flimit)
-global sym(vp10_mbpost_proc_across_ip_xmm) PRIVATE
-sym(vp10_mbpost_proc_across_ip_xmm):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16
-
- ; create flimit4 at [rsp]
- mov eax, dword ptr arg(4) ;flimit
- mov [rsp], eax
- mov [rsp+4], eax
- mov [rsp+8], eax
- mov [rsp+12], eax
-%define flimit4 [rsp]
-
-
- ;for(r=0;r<rows;r++)
-.ip_row_loop:
-
- xor rdx, rdx ;sumsq=0;
- xor rcx, rcx ;sum=0;
- mov rsi, arg(0); s
- mov rdi, -8
-.ip_var_loop:
- ;for(i=-8;i<=6;i++)
- ;{
- ; sumsq += s[i]*s[i];
- ; sum += s[i];
- ;}
- movzx eax, byte [rsi+rdi]
- add ecx, eax
- mul al
- add edx, eax
- add rdi, 1
- cmp rdi, 6
- jle .ip_var_loop
-
-
- ;mov rax, sumsq
- ;movd xmm7, rax
- movd xmm7, edx
-
- ;mov rax, sum
- ;movd xmm6, rax
- movd xmm6, ecx
-
- mov rsi, arg(0) ;s
- xor rcx, rcx
-
- movsxd rdx, dword arg(3) ;cols
- add rdx, 8
- pxor mm0, mm0
- pxor mm1, mm1
-
- pxor xmm0, xmm0
-.nextcol4:
-
- movd xmm1, DWORD PTR [rsi+rcx-8] ; -8 -7 -6 -5
- movd xmm2, DWORD PTR [rsi+rcx+7] ; +7 +8 +9 +10
-
- punpcklbw xmm1, xmm0 ; expanding
- punpcklbw xmm2, xmm0 ; expanding
-
- punpcklwd xmm1, xmm0 ; expanding to dwords
- punpcklwd xmm2, xmm0 ; expanding to dwords
-
- psubd xmm2, xmm1 ; 7--8 8--7 9--6 10--5
- paddd xmm1, xmm1 ; -8*2 -7*2 -6*2 -5*2
-
- paddd xmm1, xmm2 ; 7+-8 8+-7 9+-6 10+-5
- pmaddwd xmm1, xmm2 ; squared of 7+-8 8+-7 9+-6 10+-5
-
- paddd xmm6, xmm2
- paddd xmm7, xmm1
-
- pshufd xmm6, xmm6, 0 ; duplicate the last ones
- pshufd xmm7, xmm7, 0 ; duplicate the last ones
-
- psrldq xmm1, 4 ; 8--7 9--6 10--5 0000
- psrldq xmm2, 4 ; 8--7 9--6 10--5 0000
-
- pshufd xmm3, xmm1, 3 ; 0000 8--7 8--7 8--7 squared
- pshufd xmm4, xmm2, 3 ; 0000 8--7 8--7 8--7 squared
-
- paddd xmm6, xmm4
- paddd xmm7, xmm3
-
- pshufd xmm3, xmm1, 01011111b ; 0000 0000 9--6 9--6 squared
- pshufd xmm4, xmm2, 01011111b ; 0000 0000 9--6 9--6 squared
-
- paddd xmm7, xmm3
- paddd xmm6, xmm4
-
- pshufd xmm3, xmm1, 10111111b ; 0000 0000 8--7 8--7 squared
- pshufd xmm4, xmm2, 10111111b ; 0000 0000 8--7 8--7 squared
-
- paddd xmm7, xmm3
- paddd xmm6, xmm4
-
- movdqa xmm3, xmm6
- pmaddwd xmm3, xmm3
-
- movdqa xmm5, xmm7
- pslld xmm5, 4
-
- psubd xmm5, xmm7
- psubd xmm5, xmm3
-
- psubd xmm5, flimit4
- psrad xmm5, 31
-
- packssdw xmm5, xmm0
- packsswb xmm5, xmm0
-
- movd xmm1, DWORD PTR [rsi+rcx]
- movq xmm2, xmm1
-
- punpcklbw xmm1, xmm0
- punpcklwd xmm1, xmm0
-
- paddd xmm1, xmm6
- paddd xmm1, [GLOBAL(four8s)]
-
- psrad xmm1, 4
- packssdw xmm1, xmm0
-
- packuswb xmm1, xmm0
- pand xmm1, xmm5
-
- pandn xmm5, xmm2
- por xmm5, xmm1
-
- movd [rsi+rcx-8], mm0
- movq mm0, mm1
-
- movdq2q mm1, xmm5
- psrldq xmm7, 12
-
- psrldq xmm6, 12
- add rcx, 4
-
- cmp rcx, rdx
- jl .nextcol4
-
- ;s+=pitch;
- movsxd rax, dword arg(1)
- add arg(0), rax
-
- sub dword arg(2), 1 ;rows-=1
- cmp dword arg(2), 0
- jg .ip_row_loop
-
- add rsp, 16
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef flimit4
-
-
-SECTION_RODATA
-align 16
-rd42:
- times 8 dw 0x04
-four8s:
- times 4 dd 8
#include "vp10/common/alloccommon.h"
#include "vp10/common/loopfilter.h"
#include "vp10/common/onyxc_int.h"
-#if CONFIG_VP9_POSTPROC
-#include "vp10/common/postproc.h"
-#endif
#include "vp10/common/quant_common.h"
#include "vp10/common/reconinter.h"
#include "vp10/common/reconintra.h"
return retcode;
}
-int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd,
- vp10_ppflags_t *flags) {
+int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd) {
VP10_COMMON *const cm = &pbi->common;
int ret = -1;
-#if !CONFIG_VP9_POSTPROC
- (void)*flags;
-#endif
-
if (pbi->ready_for_new_data == 1)
return ret;
return ret;
pbi->ready_for_new_data = 1;
-
-#if CONFIG_VP9_POSTPROC
- if (!cm->show_existing_frame) {
- ret = vp10_post_proc_frame(cm, sd, flags);
- } else {
- *sd = *cm->frame_to_show;
- ret = 0;
- }
-#else
*sd = *cm->frame_to_show;
ret = 0;
-#endif /*!CONFIG_POSTPROC*/
vpx_clear_system_state();
return ret;
}
#include "vp10/common/thread_common.h"
#include "vp10/common/onyxc_int.h"
-#include "vp10/common/ppflags.h"
#include "vp10/decoder/dthread.h"
#ifdef __cplusplus
int vp10_receive_compressed_data(struct VP10Decoder *pbi,
size_t size, const uint8_t **dest);
-int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd,
- vp10_ppflags_t *flags);
+int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd);
int vp10_get_frame_to_show(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *frame);
int rate;
int64_t dist;
-#if CONFIG_VP9_TEMPORAL_DENOISING
- unsigned int newmv_sse;
- unsigned int zeromv_sse;
- PREDICTION_MODE best_sse_inter_mode;
- int_mv best_sse_mv;
- MV_REFERENCE_FRAME best_reference_frame;
- MV_REFERENCE_FRAME best_zeromv_reference_frame;
-#endif
-
// motion vector cache for adaptive motion search control in partition
// search loop
MV pred_mv[MAX_REF_FRAMES];
+++ /dev/null
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <assert.h>
-#include <limits.h>
-#include "./vpx_dsp_rtcd.h"
-#include "vpx_dsp/vpx_dsp_common.h"
-#include "vpx_scale/yv12config.h"
-#include "vpx/vpx_integer.h"
-#include "vp10/common/reconinter.h"
-#include "vp10/encoder/context_tree.h"
-#include "vp10/encoder/denoiser.h"
-
-/* The VP9 denoiser is a work-in-progress. It currently is only designed to work
- * with speed 6, though it (inexplicably) seems to also work with speed 5 (one
- * would need to modify the source code in vp10_pickmode.c and vp10_encoder.c to
- * make the calls to the vp10_denoiser_* functions when in speed 5).
- *
- * The implementation is very similar to that of the VP8 denoiser. While
- * choosing the motion vectors / reference frames, the denoiser is run, and if
- * it did not modify the signal to much, the denoised block is copied to the
- * signal.
- */
-
-#ifdef OUTPUT_YUV_DENOISED
-static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
-#endif
-
-static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) {
- (void)bs;
- return 3 + (increase_denoising ? 1 : 0);
-}
-
-static int delta_thresh(BLOCK_SIZE bs, int increase_denoising) {
- (void)bs;
- (void)increase_denoising;
- return 4;
-}
-
-static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) {
- (void)bs;
- (void)increase_denoising;
- return 625;
-}
-
-static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
- return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 60 : 40);
-}
-
-static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
- int motion_magnitude) {
- if (motion_magnitude >
- noise_motion_thresh(bs, increase_denoising)) {
- return 0;
- } else {
- return (1 << num_pels_log2_lookup[bs]) * 20;
- }
-}
-
-int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) {
- return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2);
-}
-
-static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) {
- return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2);
-}
-
-// TODO(jackychen): If increase_denoising is enabled in the future,
-// we might need to update the code for calculating 'total_adj' in
-// case the C code is not bit-exact with corresponding sse2 code.
-int vp10_denoiser_filter_c(const uint8_t *sig, int sig_stride,
- const uint8_t *mc_avg,
- int mc_avg_stride,
- uint8_t *avg, int avg_stride,
- int increase_denoising,
- BLOCK_SIZE bs,
- int motion_magnitude) {
- int r, c;
- const uint8_t *sig_start = sig;
- const uint8_t *mc_avg_start = mc_avg;
- uint8_t *avg_start = avg;
- int diff, adj, absdiff, delta;
- int adj_val[] = {3, 4, 6};
- int total_adj = 0;
- int shift_inc = 1;
-
- // If motion_magnitude is small, making the denoiser more aggressive by
- // increasing the adjustment for each level. Add another increment for
- // blocks that are labeled for increase denoising.
- if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) {
- if (increase_denoising) {
- shift_inc = 2;
- }
- adj_val[0] += shift_inc;
- adj_val[1] += shift_inc;
- adj_val[2] += shift_inc;
- }
-
- // First attempt to apply a strong temporal denoising filter.
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) {
- diff = mc_avg[c] - sig[c];
- absdiff = abs(diff);
-
- if (absdiff <= absdiff_thresh(bs, increase_denoising)) {
- avg[c] = mc_avg[c];
- total_adj += diff;
- } else {
- switch (absdiff) {
- case 4: case 5: case 6: case 7:
- adj = adj_val[0];
- break;
- case 8: case 9: case 10: case 11:
- case 12: case 13: case 14: case 15:
- adj = adj_val[1];
- break;
- default:
- adj = adj_val[2];
- }
- if (diff > 0) {
- avg[c] = VPXMIN(UINT8_MAX, sig[c] + adj);
- total_adj += adj;
- } else {
- avg[c] = VPXMAX(0, sig[c] - adj);
- total_adj -= adj;
- }
- }
- }
- sig += sig_stride;
- avg += avg_stride;
- mc_avg += mc_avg_stride;
- }
-
- // If the strong filter did not modify the signal too much, we're all set.
- if (abs(total_adj) <= total_adj_strong_thresh(bs, increase_denoising)) {
- return FILTER_BLOCK;
- }
-
- // Otherwise, we try to dampen the filter if the delta is not too high.
- delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising))
- >> num_pels_log2_lookup[bs]) + 1;
-
- if (delta >= delta_thresh(bs, increase_denoising)) {
- return COPY_BLOCK;
- }
-
- mc_avg = mc_avg_start;
- avg = avg_start;
- sig = sig_start;
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) {
- diff = mc_avg[c] - sig[c];
- adj = abs(diff);
- if (adj > delta) {
- adj = delta;
- }
- if (diff > 0) {
- // Diff positive means we made positive adjustment above
- // (in first try/attempt), so now make negative adjustment to bring
- // denoised signal down.
- avg[c] = VPXMAX(0, avg[c] - adj);
- total_adj -= adj;
- } else {
- // Diff negative means we made negative adjustment above
- // (in first try/attempt), so now make positive adjustment to bring
- // denoised signal up.
- avg[c] = VPXMIN(UINT8_MAX, avg[c] + adj);
- total_adj += adj;
- }
- }
- sig += sig_stride;
- avg += avg_stride;
- mc_avg += mc_avg_stride;
- }
-
- // We can use the filter if it has been sufficiently dampened
- if (abs(total_adj) <= total_adj_weak_thresh(bs, increase_denoising)) {
- return FILTER_BLOCK;
- }
- return COPY_BLOCK;
-}
-
-static uint8_t *block_start(uint8_t *framebuf, int stride,
- int mi_row, int mi_col) {
- return framebuf + (stride * mi_row * MI_SIZE) + (mi_col * MI_SIZE);
-}
-
-static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
- MACROBLOCK *mb,
- BLOCK_SIZE bs,
- int increase_denoising,
- int mi_row,
- int mi_col,
- PICK_MODE_CONTEXT *ctx,
- int *motion_magnitude
- ) {
- int mv_col, mv_row;
- int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
- MV_REFERENCE_FRAME frame;
- MACROBLOCKD *filter_mbd = &mb->e_mbd;
- MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi;
- MB_MODE_INFO saved_mbmi;
- int i, j;
- struct buf_2d saved_dst[MAX_MB_PLANE];
- struct buf_2d saved_pre[MAX_MB_PLANE][2]; // 2 pre buffers
-
- mv_col = ctx->best_sse_mv.as_mv.col;
- mv_row = ctx->best_sse_mv.as_mv.row;
- *motion_magnitude = mv_row * mv_row + mv_col * mv_col;
- frame = ctx->best_reference_frame;
-
- saved_mbmi = *mbmi;
-
- // If the best reference frame uses inter-prediction and there is enough of a
- // difference in sum-squared-error, use it.
- if (frame != INTRA_FRAME &&
- sse_diff > sse_diff_thresh(bs, increase_denoising, *motion_magnitude)) {
- mbmi->ref_frame[0] = ctx->best_reference_frame;
- mbmi->mode = ctx->best_sse_inter_mode;
- mbmi->mv[0] = ctx->best_sse_mv;
- } else {
- // Otherwise, use the zero reference frame.
- frame = ctx->best_zeromv_reference_frame;
-
- mbmi->ref_frame[0] = ctx->best_zeromv_reference_frame;
-#if CONFIG_EXT_INTER
- if (has_second_ref(mbmi))
- mbmi->mode = ZERO_ZEROMV;
- else
-#endif // CONFIG_EXT_INTER
- mbmi->mode = ZEROMV;
- mbmi->mv[0].as_int = 0;
-
-#if CONFIG_EXT_INTER
- if (has_second_ref(mbmi))
- ctx->best_sse_inter_mode = ZERO_ZEROMV;
- else
-#endif // CONFIG_EXT_INTER
- ctx->best_sse_inter_mode = ZEROMV;
- ctx->best_sse_mv.as_int = 0;
- ctx->newmv_sse = ctx->zeromv_sse;
- }
-
- if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
- // Restore everything to its original state
- *mbmi = saved_mbmi;
- return COPY_BLOCK;
- }
- if (*motion_magnitude >
- (noise_motion_thresh(bs, increase_denoising) << 3)) {
- // Restore everything to its original state
- *mbmi = saved_mbmi;
- return COPY_BLOCK;
- }
-
- // We will restore these after motion compensation.
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- for (j = 0; j < 2; ++j) {
- saved_pre[i][j] = filter_mbd->plane[i].pre[j];
- }
- saved_dst[i] = filter_mbd->plane[i].dst;
- }
-
- // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser
- // struct.
- for (j = 0; j < 2; ++j) {
- filter_mbd->plane[0].pre[j].buf =
- block_start(denoiser->running_avg_y[frame].y_buffer,
- denoiser->running_avg_y[frame].y_stride,
- mi_row, mi_col);
- filter_mbd->plane[0].pre[j].stride =
- denoiser->running_avg_y[frame].y_stride;
- filter_mbd->plane[1].pre[j].buf =
- block_start(denoiser->running_avg_y[frame].u_buffer,
- denoiser->running_avg_y[frame].uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[1].pre[j].stride =
- denoiser->running_avg_y[frame].uv_stride;
- filter_mbd->plane[2].pre[j].buf =
- block_start(denoiser->running_avg_y[frame].v_buffer,
- denoiser->running_avg_y[frame].uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[2].pre[j].stride =
- denoiser->running_avg_y[frame].uv_stride;
- }
- filter_mbd->plane[0].dst.buf =
- block_start(denoiser->mc_running_avg_y.y_buffer,
- denoiser->mc_running_avg_y.y_stride,
- mi_row, mi_col);
- filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
- filter_mbd->plane[1].dst.buf =
- block_start(denoiser->mc_running_avg_y.u_buffer,
- denoiser->mc_running_avg_y.uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
- filter_mbd->plane[2].dst.buf =
- block_start(denoiser->mc_running_avg_y.v_buffer,
- denoiser->mc_running_avg_y.uv_stride,
- mi_row, mi_col);
- filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
-
- vp10_build_inter_predictors_sby(filter_mbd, mv_row, mv_col, bs);
-
- // Restore everything to its original state
- *mbmi = saved_mbmi;
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- for (j = 0; j < 2; ++j) {
- filter_mbd->plane[i].pre[j] = saved_pre[i][j];
- }
- filter_mbd->plane[i].dst = saved_dst[i];
- }
-
- mv_row = ctx->best_sse_mv.as_mv.row;
- mv_col = ctx->best_sse_mv.as_mv.col;
-
- return FILTER_BLOCK;
-}
-
-void vp10_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
- int mi_row, int mi_col, BLOCK_SIZE bs,
- PICK_MODE_CONTEXT *ctx) {
- int motion_magnitude = 0;
- VP9_DENOISER_DECISION decision = FILTER_BLOCK;
- YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
- YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
- uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
- uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride,
- mi_row, mi_col);
- struct buf_2d src = mb->plane[0].src;
-
- decision = perform_motion_compensation(denoiser, mb, bs,
- denoiser->increase_denoising,
- mi_row, mi_col, ctx,
- &motion_magnitude);
-
- if (decision == FILTER_BLOCK) {
- decision = vp10_denoiser_filter(src.buf, src.stride,
- mc_avg_start, mc_avg.y_stride,
- avg_start, avg.y_stride,
- 0, bs, motion_magnitude);
- }
-
- if (decision == FILTER_BLOCK) {
- vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride,
- NULL, 0, NULL, 0,
- num_4x4_blocks_wide_lookup[bs] << 2,
- num_4x4_blocks_high_lookup[bs] << 2);
- } else { // COPY_BLOCK
- vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride,
- NULL, 0, NULL, 0,
- num_4x4_blocks_wide_lookup[bs] << 2,
- num_4x4_blocks_high_lookup[bs] << 2);
- }
-}
-
-static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) {
- int r;
- const uint8_t *srcbuf = src.y_buffer;
- uint8_t *destbuf = dest.y_buffer;
-
- assert(dest.y_width == src.y_width);
- assert(dest.y_height == src.y_height);
-
- for (r = 0; r < dest.y_height; ++r) {
- memcpy(destbuf, srcbuf, dest.y_width);
- destbuf += dest.y_stride;
- srcbuf += src.y_stride;
- }
-}
-
-static void swap_frame_buffer(YV12_BUFFER_CONFIG *dest,
- YV12_BUFFER_CONFIG *src) {
- uint8_t *tmp_buf = dest->y_buffer;
- assert(dest->y_width == src->y_width);
- assert(dest->y_height == src->y_height);
- dest->y_buffer = src->y_buffer;
- src->y_buffer = tmp_buf;
-}
-
-void vp10_denoiser_update_frame_info(VP9_DENOISER *denoiser,
- YV12_BUFFER_CONFIG src,
- FRAME_TYPE frame_type,
- int refresh_last_frame,
-#if CONFIG_EXT_REFS
- int refresh_bwd_ref_frame,
-#endif // CONFIG_EXT_REFS
- int refresh_alt_ref_frame,
- int refresh_golden_frame) {
- if (frame_type == KEY_FRAME) {
- int i;
- // Start at 1 so as not to overwrite the INTRA_FRAME
- for (i = 1; i < MAX_REF_FRAMES; ++i)
- copy_frame(denoiser->running_avg_y[i], src);
- return;
- }
-
- /* For non key frames */
- if (refresh_alt_ref_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME],
- &denoiser->running_avg_y[INTRA_FRAME]);
- }
- if (refresh_golden_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME],
- &denoiser->running_avg_y[INTRA_FRAME]);
- }
- // TODO(zoeliu): To explore whether when show_existing_frame == 1 should be
- // handled differently.
- if (refresh_last_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME],
- &denoiser->running_avg_y[INTRA_FRAME]);
- }
-#if CONFIG_EXT_REFS
- if (refresh_bwd_ref_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[BWDREF_FRAME],
- &denoiser->running_avg_y[INTRA_FRAME]);
- }
-#endif // CONFIG_EXT_REFS
-}
-
-void vp10_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
- ctx->zeromv_sse = UINT_MAX;
- ctx->newmv_sse = UINT_MAX;
-}
-
-void vp10_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse,
- PREDICTION_MODE mode,
- PICK_MODE_CONTEXT *ctx) {
- // TODO(tkopp): Use both MVs if possible
- if (mbmi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
- ctx->zeromv_sse = sse;
- ctx->best_zeromv_reference_frame = mbmi->ref_frame[0];
- }
-
- if (mbmi->mv[0].as_int != 0 && sse < ctx->newmv_sse) {
- ctx->newmv_sse = sse;
- ctx->best_sse_inter_mode = mode;
- ctx->best_sse_mv = mbmi->mv[0];
- ctx->best_reference_frame = mbmi->ref_frame[0];
- }
-}
-
-int vp10_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
- int ssx, int ssy,
-#if CONFIG_VP9_HIGHBITDEPTH
- int use_highbitdepth,
-#endif
- int border) {
- int i, fail;
- const int legacy_byte_alignment = 0;
- assert(denoiser != NULL);
-
- for (i = 0; i < MAX_REF_FRAMES; ++i) {
- fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,
- ssx, ssy,
-#if CONFIG_VP9_HIGHBITDEPTH
- use_highbitdepth,
-#endif
- border, legacy_byte_alignment);
- if (fail) {
- vp10_denoiser_free(denoiser);
- return 1;
- }
-#ifdef OUTPUT_YUV_DENOISED
- make_grayscale(&denoiser->running_avg_y[i]);
-#endif
- }
-
- fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height,
- ssx, ssy,
-#if CONFIG_VP9_HIGHBITDEPTH
- use_highbitdepth,
-#endif
- border, legacy_byte_alignment);
- if (fail) {
- vp10_denoiser_free(denoiser);
- return 1;
- }
-#ifdef OUTPUT_YUV_DENOISED
- make_grayscale(&denoiser->running_avg_y[i]);
-#endif
- denoiser->increase_denoising = 0;
- denoiser->frame_buffer_initialized = 1;
-
- return 0;
-}
-
-void vp10_denoiser_free(VP9_DENOISER *denoiser) {
- int i;
- denoiser->frame_buffer_initialized = 0;
- if (denoiser == NULL) {
- return;
- }
- for (i = 0; i < MAX_REF_FRAMES; ++i) {
- vpx_free_frame_buffer(&denoiser->running_avg_y[i]);
- }
- vpx_free_frame_buffer(&denoiser->mc_running_avg_y);
-}
-
-#ifdef OUTPUT_YUV_DENOISED
-static void make_grayscale(YV12_BUFFER_CONFIG *yuv) {
- int r, c;
- uint8_t *u = yuv->u_buffer;
- uint8_t *v = yuv->v_buffer;
-
- for (r = 0; r < yuv->uv_height; ++r) {
- for (c = 0; c < yuv->uv_width; ++c) {
- u[c] = UINT8_MAX / 2;
- v[c] = UINT8_MAX / 2;
- }
- u += yuv->uv_stride;
- v += yuv->uv_stride;
- }
-}
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_ENCODER_DENOISER_H_
-#define VP9_ENCODER_DENOISER_H_
-
-#include "vp10/encoder/block.h"
-#include "vpx_scale/yv12config.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MOTION_MAGNITUDE_THRESHOLD (8 * 3)
-
-typedef enum vp10_denoiser_decision {
- COPY_BLOCK,
- FILTER_BLOCK
-} VP9_DENOISER_DECISION;
-
-typedef struct vp10_denoiser {
- YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES];
- YV12_BUFFER_CONFIG mc_running_avg_y;
- int increase_denoising;
- int frame_buffer_initialized;
-} VP9_DENOISER;
-
-void vp10_denoiser_update_frame_info(VP9_DENOISER *denoiser,
- YV12_BUFFER_CONFIG src,
- FRAME_TYPE frame_type,
- int refresh_last_frame,
-#if CONFIG_EXT_REFS
- int refresh_bwd_ref_frame,
-#endif // CONFIG_EXT_REFS
- int refresh_alt_ref_frame,
- int refresh_golden_frame);
-
-void vp10_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
- int mi_row, int mi_col, BLOCK_SIZE bs,
- PICK_MODE_CONTEXT *ctx);
-
-void vp10_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
-
-void vp10_denoiser_update_frame_stats(MB_MODE_INFO *mbmi,
- unsigned int sse, PREDICTION_MODE mode,
- PICK_MODE_CONTEXT *ctx);
-
-int vp10_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
- int ssx, int ssy,
-#if CONFIG_VP9_HIGHBITDEPTH
- int use_highbitdepth,
-#endif
- int border);
-
-#if CONFIG_VP9_TEMPORAL_DENOISING
-int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising);
-#endif
-
-void vp10_denoiser_free(VP9_DENOISER *denoiser);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_ENCODER_DENOISER_H_
#include "vp10/common/alloccommon.h"
#include "vp10/common/filter.h"
#include "vp10/common/idct.h"
-#if CONFIG_VP9_POSTPROC
-#include "vp10/common/postproc.h"
-#endif
#include "vp10/common/reconinter.h"
#include "vp10/common/reconintra.h"
#include "vp10/common/tile_common.h"
vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf);
vp10_free_ref_frame_buffers(cm->buffer_pool);
-#if CONFIG_VP9_POSTPROC
- vp10_free_postproc_buffers(cm);
-#endif // CONFIG_VP9_POSTPROC
#if CONFIG_LOOP_RESTORATION
vp10_free_restoration_buffers(cm);
#endif // CONFIG_LOOP_RESTORATION
cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
-#if CONFIG_VP9_TEMPORAL_DENOISING
-#ifdef OUTPUT_YUV_DENOISED
- yuv_denoised_file = fopen("denoised.yuv", "ab");
-#endif
-#endif
#ifdef OUTPUT_YUV_SKINMAP
yuv_skinmap_file = fopen("skinmap.yuv", "ab");
#endif
#endif
}
-#if CONFIG_VP9_TEMPORAL_DENOISING
- vp10_denoiser_free(&(cpi->denoiser));
-#endif
-
for (t = 0; t < cpi->num_workers; ++t) {
VPxWorker *const worker = &cpi->workers[t];
EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
vp10_remove_common(cm);
vp10_free_ref_frame_buffers(cm->buffer_pool);
-#if CONFIG_VP9_POSTPROC
- vp10_free_postproc_buffers(cm);
-#endif
vpx_free(cpi);
-#if CONFIG_VP9_TEMPORAL_DENOISING
-#ifdef OUTPUT_YUV_DENOISED
- fclose(yuv_denoised_file);
-#endif
-#endif
#ifdef OUTPUT_YUV_SKINMAP
fclose(yuv_skinmap_file);
#endif
// Dump out all reference frame images.
dump_ref_frame_images(cpi);
#endif // DUMP_REF_FRAME_IMAGES
-
-#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0) {
- vp10_denoiser_update_frame_info(&cpi->denoiser,
- *cpi->Source,
- cpi->common.frame_type,
- cpi->refresh_last_frame,
-#if CONFIG_EXT_REFS
- cpi->refresh_bwd_ref_frame,
-#endif // CONFIG_EXT_REFS
- cpi->refresh_alt_ref_frame,
- cpi->refresh_golden_frame);
- }
-#endif
}
static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
// lagged coding, and if the relevant speed feature flag is set.
if (oxcf->pass == 2 && cpi->sf.static_segmentation)
configure_static_seg_features(cpi);
-
-#if CONFIG_VP9_POSTPROC
- if (oxcf->noise_sensitivity > 0) {
- int l = 0;
- switch (oxcf->noise_sensitivity) {
- case 1:
- l = 20;
- break;
- case 2:
- l = 40;
- break;
- case 3:
- l = 60;
- break;
- case 4:
- case 5:
- l = 100;
- break;
- case 6:
- l = 150;
- break;
- }
- vp10_denoise(cpi->Source, cpi->Source, l);
- }
-#endif // CONFIG_VP9_POSTPROC
}
static void init_motion_estimation(VP10_COMP *cpi) {
encode_with_recode_loop(cpi, size, dest);
}
-#if CONFIG_VP9_TEMPORAL_DENOISING
-#ifdef OUTPUT_YUV_DENOISED
- if (oxcf->noise_sensitivity > 0) {
- vp10_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME],
- yuv_denoised_file);
- }
-#endif // OUTPUT_YUV_DENOISED
-#endif // CONFIG_VP9_TEMPORAL_DENOISING
-
#ifdef OUTPUT_YUV_SKINMAP
if (cpi->common.current_video_frame > 1) {
vp10_compute_skin_map(cpi, yuv_skinmap_file);
}
}
-#if CONFIG_VP9_TEMPORAL_DENOISING
-static void setup_denoiser_buffer(VP10_COMP *cpi) {
- VP10_COMMON *const cm = &cpi->common;
- if (cpi->oxcf.noise_sensitivity > 0 &&
- !cpi->denoiser.frame_buffer_initialized) {
- if (vp10_denoiser_alloc(&cpi->denoiser, cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VPX_ENC_BORDER_IN_PIXELS))
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate denoiser");
- }
-}
-#endif
-
int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
check_initial_width(cpi, subsampling_x, subsampling_y);
#endif // CONFIG_VP9_HIGHBITDEPTH
-#if CONFIG_VP9_TEMPORAL_DENOISING
- setup_denoiser_buffer(cpi);
-#endif
vpx_usec_timer_start(&timer);
if (vp10_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
return 0;
}
-int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest,
- vp10_ppflags_t *flags) {
+int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
VP10_COMMON *cm = &cpi->common;
-#if !CONFIG_VP9_POSTPROC
- (void)flags;
-#endif
-
if (!cm->show_frame) {
return -1;
} else {
int ret;
-#if CONFIG_VP9_POSTPROC
- ret = vp10_post_proc_frame(cm, dest, flags);
-#else
if (cm->frame_to_show) {
*dest = *cm->frame_to_show;
dest->y_width = cm->width;
} else {
ret = -1;
}
-#endif // !CONFIG_VP9_POSTPROC
vpx_clear_system_state();
return ret;
}
check_initial_width(cpi, 1, 1);
#endif // CONFIG_VP9_HIGHBITDEPTH
-#if CONFIG_VP9_TEMPORAL_DENOISING
- setup_denoiser_buffer(cpi);
-#endif
-
if (width) {
cm->width = width;
if (cm->width > cpi->initial_width) {
#include "vpx/vp8cx.h"
#include "vp10/common/alloccommon.h"
-#include "vp10/common/ppflags.h"
#include "vp10/common/entropymode.h"
#include "vp10/common/thread_common.h"
#include "vp10/common/onyxc_int.h"
-
#include "vp10/encoder/aq_cyclicrefresh.h"
#if CONFIG_ANS
#include "vp10/encoder/buf_ans.h"
#include "vp10/encoder/tokenize.h"
#include "vp10/encoder/variance_tree.h"
-#if CONFIG_VP9_TEMPORAL_DENOISING
-#include "vp10/encoder/denoiser.h"
-#endif
-
#if CONFIG_INTERNAL_STATS
#include "vpx_dsp/ssim.h"
#endif
TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
-#if CONFIG_VP9_TEMPORAL_DENOISING
- VP9_DENOISER denoiser;
-#endif
-
int resize_pending;
int resize_state;
int resize_scale_num;
size_t *size, uint8_t *dest,
int64_t *time_stamp, int64_t *time_end, int flush);
-int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest,
- vp10_ppflags_t *flags);
+int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest);
int vp10_get_last_show_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *frame);
+++ /dev/null
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <emmintrin.h>
-
-#include "./vpx_config.h"
-#include "./vp10_rtcd.h"
-
-#include "vpx_ports/emmintrin_compat.h"
-#include "vpx/vpx_integer.h"
-#include "vp10/common/reconinter.h"
-#include "vp10/encoder/context_tree.h"
-#include "vp10/encoder/denoiser.h"
-#include "vpx_mem/vpx_mem.h"
-
-// Compute the sum of all pixel differences of this MB.
-static INLINE int sum_diff_16x1(__m128i acc_diff) {
- const __m128i k_1 = _mm_set1_epi16(1);
- const __m128i acc_diff_lo =
- _mm_srai_epi16(_mm_unpacklo_epi8(acc_diff, acc_diff), 8);
- const __m128i acc_diff_hi =
- _mm_srai_epi16(_mm_unpackhi_epi8(acc_diff, acc_diff), 8);
- const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi);
- const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1);
- const __m128i hgfe_dcba =
- _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8));
- const __m128i hgfedcba =
- _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4));
- return _mm_cvtsi128_si32(hgfedcba);
-}
-
-// Denoise a 16x1 vector.
-static INLINE __m128i vp10_denoiser_16x1_sse2(const uint8_t *sig,
- const uint8_t *mc_running_avg_y,
- uint8_t *running_avg_y,
- const __m128i *k_0,
- const __m128i *k_4,
- const __m128i *k_8,
- const __m128i *k_16,
- const __m128i *l3,
- const __m128i *l32,
- const __m128i *l21,
- __m128i acc_diff) {
- // Calculate differences
- const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
- const __m128i v_mc_running_avg_y =
- _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0]));
- __m128i v_running_avg_y;
- const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
- const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
- // Obtain the sign. FF if diff is negative.
- const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, *k_0);
- // Clamp absolute difference to 16 to be used to get mask. Doing this
- // allows us to use _mm_cmpgt_epi8, which operates on signed byte.
- const __m128i clamped_absdiff =
- _mm_min_epu8(_mm_or_si128(pdiff, ndiff), *k_16);
- // Get masks for l2 l1 and l0 adjustments.
- const __m128i mask2 = _mm_cmpgt_epi8(*k_16, clamped_absdiff);
- const __m128i mask1 = _mm_cmpgt_epi8(*k_8, clamped_absdiff);
- const __m128i mask0 = _mm_cmpgt_epi8(*k_4, clamped_absdiff);
- // Get adjustments for l2, l1, and l0.
- __m128i adj2 = _mm_and_si128(mask2, *l32);
- const __m128i adj1 = _mm_and_si128(mask1, *l21);
- const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
- __m128i adj, padj, nadj;
-
- // Combine the adjustments and get absolute adjustments.
- adj2 = _mm_add_epi8(adj2, adj1);
- adj = _mm_sub_epi8(*l3, adj2);
- adj = _mm_andnot_si128(mask0, adj);
- adj = _mm_or_si128(adj, adj0);
-
- // Restore the sign and get positive and negative adjustments.
- padj = _mm_andnot_si128(diff_sign, adj);
- nadj = _mm_and_si128(diff_sign, adj);
-
- // Calculate filtered value.
- v_running_avg_y = _mm_adds_epu8(v_sig, padj);
- v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj);
- _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y);
-
- // Adjustments <=7, and each element in acc_diff can fit in signed
- // char.
- acc_diff = _mm_adds_epi8(acc_diff, padj);
- acc_diff = _mm_subs_epi8(acc_diff, nadj);
- return acc_diff;
-}
-
-// Denoise a 16x1 vector with a weaker filter.
-static INLINE __m128i vp10_denoiser_adj_16x1_sse2(
- const uint8_t *sig, const uint8_t *mc_running_avg_y,
- uint8_t *running_avg_y, const __m128i k_0,
- const __m128i k_delta, __m128i acc_diff) {
- __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
- // Calculate differences.
- const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
- const __m128i v_mc_running_avg_y =
- _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0]));
- const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
- const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
- // Obtain the sign. FF if diff is negative.
- const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
- // Clamp absolute difference to delta to get the adjustment.
- const __m128i adj =
- _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
- // Restore the sign and get positive and negative adjustments.
- __m128i padj, nadj;
- padj = _mm_andnot_si128(diff_sign, adj);
- nadj = _mm_and_si128(diff_sign, adj);
- // Calculate filtered value.
- v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj);
- v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj);
- _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y);
-
- // Accumulate the adjustments.
- acc_diff = _mm_subs_epi8(acc_diff, padj);
- acc_diff = _mm_adds_epi8(acc_diff, nadj);
- return acc_diff;
-}
-
-// Denoiser for 4xM and 8xM blocks.
-static int vp10_denoiser_NxM_sse2_small(
- const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y,
- int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride,
- int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) {
- int sum_diff_thresh, r, sum_diff = 0;
- const int shift_inc = (increase_denoising &&
- motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
- 1 : 0;
- uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16];
- __m128i acc_diff = _mm_setzero_si128();
- const __m128i k_0 = _mm_setzero_si128();
- const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
- const __m128i k_8 = _mm_set1_epi8(8);
- const __m128i k_16 = _mm_set1_epi8(16);
- // Modify each level's adjustment according to motion_magnitude.
- const __m128i l3 = _mm_set1_epi8(
- (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6);
- // Difference between level 3 and level 2 is 2.
- const __m128i l32 = _mm_set1_epi8(2);
- // Difference between level 2 and level 1 is 1.
- const __m128i l21 = _mm_set1_epi8(1);
- const uint8_t shift = (width == 4) ? 2 : 1;
-
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
- memcpy(sig_buffer[r], sig, width);
- memcpy(sig_buffer[r] + width, sig + sig_stride, width);
- memcpy(mc_running_buffer[r], mc_running_avg_y, width);
- memcpy(mc_running_buffer[r] + width,
- mc_running_avg_y + mc_avg_y_stride, width);
- memcpy(running_buffer[r], running_avg_y, width);
- memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width);
- if (width == 4) {
- memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width);
- memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width);
- memcpy(mc_running_buffer[r] + width * 2,
- mc_running_avg_y + mc_avg_y_stride * 2, width);
- memcpy(mc_running_buffer[r] + width * 3,
- mc_running_avg_y + mc_avg_y_stride * 3, width);
- memcpy(running_buffer[r] + width * 2,
- running_avg_y + avg_y_stride * 2, width);
- memcpy(running_buffer[r] + width * 3,
- running_avg_y + avg_y_stride * 3, width);
- }
- acc_diff = vp10_denoiser_16x1_sse2(sig_buffer[r],
- mc_running_buffer[r],
- running_buffer[r],
- &k_0, &k_4, &k_8, &k_16,
- &l3, &l32, &l21, acc_diff);
- memcpy(running_avg_y, running_buffer[r], width);
- memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width);
- if (width == 4) {
- memcpy(running_avg_y + avg_y_stride * 2,
- running_buffer[r] + width * 2, width);
- memcpy(running_avg_y + avg_y_stride * 3,
- running_buffer[r] + width * 3, width);
- }
- // Update pointers for next iteration.
- sig += (sig_stride << shift);
- mc_running_avg_y += (mc_avg_y_stride << shift);
- running_avg_y += (avg_y_stride << shift);
- }
-
- {
- sum_diff = sum_diff_16x1(acc_diff);
- sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
- if (abs(sum_diff) > sum_diff_thresh) {
- // Before returning to copy the block (i.e., apply no denoising),
- // check if we can still apply some (weaker) temporal filtering to
- // this block, that would otherwise not be denoised at all. Simplest
- // is to apply an additional adjustment to running_avg_y to bring it
- // closer to sig. The adjustment is capped by a maximum delta, and
- // chosen such that in most cases the resulting sum_diff will be
- // within the acceptable range given by sum_diff_thresh.
-
- // The delta is set by the excess of absolute pixel diff over the
- // threshold.
- const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
- num_pels_log2_lookup[bs]) + 1;
- // Only apply the adjustment for max delta up to 3.
- if (delta < 4) {
- const __m128i k_delta = _mm_set1_epi8(delta);
- running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
- acc_diff = vp10_denoiser_adj_16x1_sse2(
- sig_buffer[r], mc_running_buffer[r], running_buffer[r],
- k_0, k_delta, acc_diff);
- memcpy(running_avg_y, running_buffer[r], width);
- memcpy(running_avg_y + avg_y_stride,
- running_buffer[r] + width, width);
- if (width == 4) {
- memcpy(running_avg_y + avg_y_stride * 2,
- running_buffer[r] + width * 2, width);
- memcpy(running_avg_y + avg_y_stride * 3,
- running_buffer[r] + width * 3, width);
- }
- // Update pointers for next iteration.
- running_avg_y += (avg_y_stride << shift);
- }
- sum_diff = sum_diff_16x1(acc_diff);
- if (abs(sum_diff) > sum_diff_thresh) {
- return COPY_BLOCK;
- }
- } else {
- return COPY_BLOCK;
- }
- }
- }
- return FILTER_BLOCK;
-}
-
-// Denoiser for 16xM, 32xM and 64xM blocks
-static int vp10_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
- const uint8_t *mc_running_avg_y,
- int mc_avg_y_stride,
- uint8_t *running_avg_y,
- int avg_y_stride,
- int increase_denoising, BLOCK_SIZE bs,
- int motion_magnitude) {
- int sum_diff_thresh, r, c, sum_diff = 0;
- const int shift_inc = (increase_denoising &&
- motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
- 1 : 0;
- __m128i acc_diff[4][4];
- const __m128i k_0 = _mm_setzero_si128();
- const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
- const __m128i k_8 = _mm_set1_epi8(8);
- const __m128i k_16 = _mm_set1_epi8(16);
- // Modify each level's adjustment according to motion_magnitude.
- const __m128i l3 = _mm_set1_epi8(
- (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6);
- // Difference between level 3 and level 2 is 2.
- const __m128i l32 = _mm_set1_epi8(2);
- // Difference between level 2 and level 1 is 1.
- const __m128i l21 = _mm_set1_epi8(1);
-
- for (c = 0; c < 4; ++c) {
- for (r = 0; r < 4; ++r) {
- acc_diff[c][r] = _mm_setzero_si128();
- }
- }
-
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- acc_diff[c>>4][r>>4] = vp10_denoiser_16x1_sse2(
- sig, mc_running_avg_y, running_avg_y, &k_0, &k_4,
- &k_8, &k_16, &l3, &l32, &l21, acc_diff[c>>4][r>>4]);
- // Update pointers for next iteration.
- sig += 16;
- mc_running_avg_y += 16;
- running_avg_y += 16;
- }
-
- if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]);
- }
- }
-
- // Update pointers for next iteration.
- sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
- mc_running_avg_y = mc_running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- mc_avg_y_stride;
- running_avg_y = running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- avg_y_stride;
- }
-
- {
- sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
- if (abs(sum_diff) > sum_diff_thresh) {
- const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
- num_pels_log2_lookup[bs]) + 1;
-
- // Only apply the adjustment for max delta up to 3.
- if (delta < 4) {
- const __m128i k_delta = _mm_set1_epi8(delta);
- sig -= sig_stride * (4 << b_height_log2_lookup[bs]);
- mc_running_avg_y -= mc_avg_y_stride * (4 << b_height_log2_lookup[bs]);
- running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
- sum_diff = 0;
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- acc_diff[c>>4][r>>4] = vp10_denoiser_adj_16x1_sse2(
- sig, mc_running_avg_y, running_avg_y, k_0,
- k_delta, acc_diff[c>>4][r>>4]);
- // Update pointers for next iteration.
- sig += 16;
- mc_running_avg_y += 16;
- running_avg_y += 16;
- }
-
- if ((r + 1) % 16 == 0 || (bs == BLOCK_16X8 && r == 7)) {
- for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
- sum_diff += sum_diff_16x1(acc_diff[c>>4][r>>4]);
- }
- }
- sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
- mc_running_avg_y = mc_running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- mc_avg_y_stride;
- running_avg_y = running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- avg_y_stride;
- }
- if (abs(sum_diff) > sum_diff_thresh) {
- return COPY_BLOCK;
- }
- } else {
- return COPY_BLOCK;
- }
- }
- }
- return FILTER_BLOCK;
-}
-
-int vp10_denoiser_filter_sse2(const uint8_t *sig, int sig_stride,
- const uint8_t *mc_avg,
- int mc_avg_stride,
- uint8_t *avg, int avg_stride,
- int increase_denoising,
- BLOCK_SIZE bs,
- int motion_magnitude) {
- if (bs == BLOCK_4X4 || bs == BLOCK_4X8) {
- return vp10_denoiser_NxM_sse2_small(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude, 4);
- } else if (bs == BLOCK_8X4 || bs == BLOCK_8X8 || bs == BLOCK_8X16) {
- return vp10_denoiser_NxM_sse2_small(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude, 8);
- } else if (bs < BLOCK_SIZES) {
- return vp10_denoiser_NxM_sse2_big(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude);
- } else {
- return COPY_BLOCK;
- }
-}
VP10_COMMON_SRCS-yes += vp10_common.mk
VP10_COMMON_SRCS-yes += vp10_iface_common.h
VP10_COMMON_SRCS-yes += common/ans.h
-VP10_COMMON_SRCS-yes += common/ppflags.h
VP10_COMMON_SRCS-yes += common/alloccommon.c
VP10_COMMON_SRCS-yes += common/blockd.c
VP10_COMMON_SRCS-yes += common/debugmodes.c
VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h
VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.h
VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.c
-
-VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
-VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
VP10_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.h
VP10_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.c
-VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.h
-VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.c
-ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
-VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
-endif
ifeq (yes,$(filter yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)))
VP10_COMMON_SRCS-yes += common/warped_motion.h
VP10_COMMON_SRCS-yes += common/warped_motion.c
VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct8x8_msa.c
VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct16x16_msa.c
-ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c
-endif
-
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_sse2.c
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_dct32x32_impl_sse2.h
static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
va_list args) {
-#if CONFIG_VP9_POSTPROC
- vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *);
- if (config != NULL) {
- ctx->preview_ppcfg = *config;
- return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
- }
-#else
(void)ctx;
(void)args;
return VPX_CODEC_INCAPABLE;
-#endif
}
static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
YV12_BUFFER_CONFIG sd;
- vp10_ppflags_t flags;
- vp10_zero(flags);
-
- if (ctx->preview_ppcfg.post_proc_flag) {
- flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag;
- flags.deblocking_level = ctx->preview_ppcfg.deblocking_level;
- flags.noise_level = ctx->preview_ppcfg.noise_level;
- }
- if (vp10_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) {
+ if (vp10_get_preview_raw_frame(ctx->cpi, &sd) == 0) {
yuvconfig2image(&ctx->preview_img, &sd, NULL);
return &ctx->preview_img;
} else {
#include "vp10/vp10_iface_common.h"
-#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
-
typedef vpx_codec_stream_info_t vp10_stream_info_t;
// This limit is due to framebuffer numbers.
(FrameWorkerData *)worker->data1;
vpx_get_worker_interface()->end(worker);
vp10_remove_common(&frame_worker_data->pbi->common);
-#if CONFIG_VP9_POSTPROC
- vp10_free_postproc_buffers(&frame_worker_data->pbi->common);
-#endif
#if CONFIG_LOOP_RESTORATION
vp10_free_restoration_buffers(&frame_worker_data->pbi->common);
#endif // CONFIG_LOOP_RESTORATION
cfg->noise_level = 0;
}
-static void set_ppflags(const vpx_codec_alg_priv_t *ctx,
- vp10_ppflags_t *flags) {
- flags->post_proc_flag =
- ctx->postproc_cfg.post_proc_flag;
-
- flags->deblocking_level = ctx->postproc_cfg.deblocking_level;
- flags->noise_level = ctx->postproc_cfg.noise_level;
-}
-
static int frame_worker_hook(void *arg1, void *arg2) {
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1;
const uint8_t *data = frame_worker_data->data;
static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) {
YV12_BUFFER_CONFIG sd;
- vp10_ppflags_t flags = {0, 0, 0};
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
check_resync(ctx, frame_worker_data->pbi);
- if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
+ if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) {
VP10_COMMON *const cm = &frame_worker_data->pbi->common;
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx;
if (*iter == NULL && ctx->frame_workers != NULL) {
do {
YV12_BUFFER_CONFIG sd;
- vp10_ppflags_t flags = {0, 0, 0};
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
VPxWorker *const worker =
&ctx->frame_workers[ctx->next_output_worker_id];
(FrameWorkerData *)worker->data1;
ctx->next_output_worker_id =
(ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
- if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
- set_ppflags(ctx, &flags);
// Wait for the frame from worker thread.
if (winterface->sync(worker)) {
// Check if worker has received any frames.
frame_worker_data->received_frame = 0;
check_resync(ctx, frame_worker_data->pbi);
}
- if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
+ if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) {
VP10_COMMON *const cm = &frame_worker_data->pbi->common;
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
release_last_output_frame(ctx);
static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
va_list args) {
-#if CONFIG_VP9_POSTPROC
- vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
-
- if (data) {
- ctx->postproc_cfg_set = 1;
- ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data);
- return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
- }
-#else
(void)ctx;
(void)args;
return VPX_CODEC_INCAPABLE;
-#endif
}
static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx,
CODEC_INTERFACE(vpx_codec_vp10_dx) = {
"WebM Project VP10 Decoder" VERSION_STRING,
VPX_CODEC_INTERNAL_ABI_VERSION,
- VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC |
+ VPX_CODEC_CAP_DECODER |
VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t
decoder_init, // vpx_codec_init_fn_t
decoder_destroy, // vpx_codec_destroy_fn_t
VP10_CX_SRCS-yes += encoder/dct.c
VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.c
VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.h
-VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.c
-VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.h
VP10_CX_SRCS-yes += encoder/encodeframe.c
VP10_CX_SRCS-yes += encoder/encodeframe.h
VP10_CX_SRCS-yes += encoder/encodemb.c
VP10_CX_SRCS-yes += encoder/aq_complexity.h
VP10_CX_SRCS-yes += encoder/skin_detection.c
VP10_CX_SRCS-yes += encoder/skin_detection.h
-ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h
-VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c
-endif
VP10_CX_SRCS-yes += encoder/temporal_filter.c
VP10_CX_SRCS-yes += encoder/temporal_filter.h
VP10_CX_SRCS-yes += encoder/mbgraph.c
VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp10_highbd_quantize_sse4.c
endif
-ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
-VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoiser_sse2.c
-endif
ifeq ($(CONFIG_EXT_INTER),yes)
VP10_CX_SRCS-yes += encoder/wedge_utils.c
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/wedge_utils_sse2.c