${toggle_vp10} VP10 codec support
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
${toggle_postproc} postprocessing
- ${toggle_vp9_postproc} vp9 specific postprocessing
${toggle_multithread} multithreaded encoding and decoding
${toggle_spatial_resampling} spatial sampling (scaling) support
${toggle_realtime_only} enable this option while building for real-time encoding
dc_recon
runtime_cpu_detect
postproc
- vp9_postproc
multithread
internal_stats
${CODECS}
dequant_tokens
dc_recon
postproc
- vp9_postproc
multithread
internal_stats
${CODECS}
done
enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
- ! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost"
+ ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
enable_feature dc_recon
fi
- if enabled internal_stats; then
- enable_feature vp9_postproc
- fi
-
# Enable the postbuild target if building for visual studio.
case "$tgt_cc" in
vs*) enable_feature msvs
}
}
-void vp10_free_postproc_buffers(VP10_COMMON *cm) {
-#if CONFIG_VP9_POSTPROC
- vpx_free_frame_buffer(&cm->post_proc_buffer);
- vpx_free_frame_buffer(&cm->post_proc_buffer_int);
-#else
- (void)cm;
-#endif
-}
-
void vp10_free_context_buffers(VP10_COMMON *cm) {
cm->free_mi(cm);
free_seg_map(cm);
+++ /dev/null
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vpx_config.h"
-#include "./vp10_rtcd.h"
-#include "./vpx_dsp_rtcd.h"
-#include "./vpx_scale_rtcd.h"
-
-#include "vp10/common/onyxc_int.h"
-#include "vp10/common/postproc.h"
-
-// TODO(jackychen): Replace this function with SSE2 code. There is
-// one SSE2 implementation in vp8, so will consider how to share it
-// between vp8 and vp9.
-static void filter_by_weight(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int block_size, int src_weight) {
- const int dst_weight = (1 << MFQE_PRECISION) - src_weight;
- const int rounding_bit = 1 << (MFQE_PRECISION - 1);
- int r, c;
-
- for (r = 0; r < block_size; r++) {
- for (c = 0; c < block_size; c++) {
- dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit)
- >> MFQE_PRECISION;
- }
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-void vp10_filter_by_weight8x8_c(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int src_weight) {
- filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
-}
-
-void vp10_filter_by_weight16x16_c(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int src_weight) {
- filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
-}
-
-static void filter_by_weight32x32(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int weight) {
- vp10_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight);
- vp10_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride,
- weight);
- vp10_filter_by_weight16x16(src + src_stride * 16, src_stride,
- dst + dst_stride * 16, dst_stride, weight);
- vp10_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride,
- dst + dst_stride * 16 + 16, dst_stride, weight);
-}
-
-static void filter_by_weight64x64(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride, int weight) {
- filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);
- filter_by_weight32x32(src + 32, src_stride, dst + 32,
- dst_stride, weight);
- filter_by_weight32x32(src + src_stride * 32, src_stride,
- dst + dst_stride * 32, dst_stride, weight);
- filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,
- dst + dst_stride * 32 + 32, dst_stride, weight);
-}
-
-static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd,
- int yd_stride, const uint8_t *u, const uint8_t *v,
- int uv_stride, uint8_t *ud, uint8_t *vd,
- int uvd_stride, BLOCK_SIZE block_size,
- int weight) {
- if (block_size == BLOCK_16X16) {
- vp10_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight);
- vp10_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight);
- vp10_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight);
- } else if (block_size == BLOCK_32X32) {
- filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);
- vp10_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight);
- vp10_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight);
- } else if (block_size == BLOCK_64X64) {
- filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);
- filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);
- filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);
- }
-}
-
-// TODO(jackychen): Determine whether replace it with assembly code.
-static void copy_mem8x8(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride) {
- int r;
- for (r = 0; r < 8; r++) {
- memcpy(dst, src, 8);
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void copy_mem16x16(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride) {
- int r;
- for (r = 0; r < 16; r++) {
- memcpy(dst, src, 16);
- src += src_stride;
- dst += dst_stride;
- }
-}
-
-static void copy_mem32x32(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride) {
- copy_mem16x16(src, src_stride, dst, dst_stride);
- copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);
- copy_mem16x16(src + src_stride * 16, src_stride,
- dst + dst_stride * 16, dst_stride);
- copy_mem16x16(src + src_stride * 16 + 16, src_stride,
- dst + dst_stride * 16 + 16, dst_stride);
-}
-
-void copy_mem64x64(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride) {
- copy_mem32x32(src, src_stride, dst, dst_stride);
- copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);
- copy_mem32x32(src + src_stride * 32, src_stride,
- dst + src_stride * 32, dst_stride);
- copy_mem32x32(src + src_stride * 32 + 32, src_stride,
- dst + src_stride * 32 + 32, dst_stride);
-}
-
-static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
- int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud,
- uint8_t *vd, int yd_stride, int uvd_stride,
- BLOCK_SIZE bs) {
- if (bs == BLOCK_16X16) {
- copy_mem16x16(y, y_stride, yd, yd_stride);
- copy_mem8x8(u, uv_stride, ud, uvd_stride);
- copy_mem8x8(v, uv_stride, vd, uvd_stride);
- } else if (bs == BLOCK_32X32) {
- copy_mem32x32(y, y_stride, yd, yd_stride);
- copy_mem16x16(u, uv_stride, ud, uvd_stride);
- copy_mem16x16(v, uv_stride, vd, uvd_stride);
- } else {
- copy_mem64x64(y, y_stride, yd, yd_stride);
- copy_mem32x32(u, uv_stride, ud, uvd_stride);
- copy_mem32x32(v, uv_stride, vd, uvd_stride);
- }
-}
-
-static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) {
- const int adj = qdiff >> MFQE_PRECISION;
- if (bs == BLOCK_16X16) {
- *sad_thr = 7 + adj;
- } else if (bs == BLOCK_32X32) {
- *sad_thr = 6 + adj;
- } else { // BLOCK_64X64
- *sad_thr = 5 + adj;
- }
- *vdiff_thr = 125 + qdiff;
-}
-
-static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
- const uint8_t *v, int y_stride, int uv_stride,
- uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride,
- int uvd_stride, int qdiff) {
- int sad, sad_thr, vdiff, vdiff_thr;
- uint32_t sse;
-
- get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
-
- if (bs == BLOCK_16X16) {
- vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
- sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
- } else if (bs == BLOCK_32X32) {
- vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
- sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
- } else /* if (bs == BLOCK_64X64) */ {
- vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
- sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
- }
-
- // vdiff > sad * 3 means vdiff should not be too small, otherwise,
- // it might be a lighting change in smooth area. When there is a
- // lighting change in smooth area, it is dangerous to do MFQE.
- if (sad > 1 && vdiff > sad * 3) {
- const int weight = 1 << MFQE_PRECISION;
- int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);
- // When ifactor equals weight, no MFQE is done.
- if (ifactor > weight) {
- ifactor = weight;
- }
- apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
- uvd_stride, bs, ifactor);
- } else {
- // Copy the block from current frame (i.e., no mfqe is done).
- copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,
- yd_stride, uvd_stride, bs);
- }
-}
-
-static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
- // Check the motion in current block(for inter frame),
- // or check the motion in the correlated block in last frame (for keyframe).
- const int mv_len_square = mi->mbmi.mv[0].as_mv.row *
- mi->mbmi.mv[0].as_mv.row +
- mi->mbmi.mv[0].as_mv.col *
- mi->mbmi.mv[0].as_mv.col;
- const int mv_threshold = 100;
- return mi->mbmi.mode >= NEARESTMV && // Not an intra block
- cur_bs >= BLOCK_16X16 &&
- mv_len_square <= mv_threshold;
-}
-
-// Process each partiton in a super block, recursively.
-static void mfqe_partition(VP10_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
- const uint8_t *y, const uint8_t *u,
- const uint8_t *v, int y_stride, int uv_stride,
- uint8_t *yd, uint8_t *ud, uint8_t *vd,
- int yd_stride, int uvd_stride) {
- int mi_offset, y_offset, uv_offset;
- const BLOCK_SIZE cur_bs = mi->mbmi.sb_type;
- const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
- const int bsl = b_width_log2_lookup[bs];
- PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
- const BLOCK_SIZE subsize = get_subsize(bs, partition);
-
- if (cur_bs < BLOCK_8X8) {
- // If there are blocks smaller than 8x8, it must be on the boundary.
- return;
- }
- // No MFQE on blocks smaller than 16x16
- if (bs == BLOCK_16X16) {
- partition = PARTITION_NONE;
- }
- if (bs == BLOCK_64X64) {
- mi_offset = 4;
- y_offset = 32;
- uv_offset = 16;
- } else {
- mi_offset = 2;
- y_offset = 16;
- uv_offset = 8;
- }
- switch (partition) {
- BLOCK_SIZE mfqe_bs, bs_tmp;
- case PARTITION_HORZ:
- if (bs == BLOCK_64X64) {
- mfqe_bs = BLOCK_64X32;
- bs_tmp = BLOCK_32X32;
- } else {
- mfqe_bs = BLOCK_32X16;
- bs_tmp = BLOCK_16X16;
- }
- if (mfqe_decision(mi, mfqe_bs)) {
- // Do mfqe on the first square partition.
- mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,
- yd, ud, vd, yd_stride, uvd_stride, qdiff);
- // Do mfqe on the second square partition.
- mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,
- y_stride, uv_stride, yd + y_offset, ud + uv_offset,
- vd + uv_offset, yd_stride, uvd_stride, qdiff);
- }
- if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {
- // Do mfqe on the first square partition.
- mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
- v + uv_offset * uv_stride, y_stride, uv_stride,
- yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
- vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
- // Do mfqe on the second square partition.
- mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
- u + uv_offset * uv_stride + uv_offset,
- v + uv_offset * uv_stride + uv_offset, y_stride,
- uv_stride, yd + y_offset * yd_stride + y_offset,
- ud + uv_offset * uvd_stride + uv_offset,
- vd + uv_offset * uvd_stride + uv_offset,
- yd_stride, uvd_stride, qdiff);
- }
- break;
- case PARTITION_VERT:
- if (bs == BLOCK_64X64) {
- mfqe_bs = BLOCK_32X64;
- bs_tmp = BLOCK_32X32;
- } else {
- mfqe_bs = BLOCK_16X32;
- bs_tmp = BLOCK_16X16;
- }
- if (mfqe_decision(mi, mfqe_bs)) {
- // Do mfqe on the first square partition.
- mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,
- yd, ud, vd, yd_stride, uvd_stride, qdiff);
- // Do mfqe on the second square partition.
- mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
- v + uv_offset * uv_stride, y_stride, uv_stride,
- yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
- vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
- }
- if (mfqe_decision(mi + mi_offset, mfqe_bs)) {
- // Do mfqe on the first square partition.
- mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,
- y_stride, uv_stride, yd + y_offset, ud + uv_offset,
- vd + uv_offset, yd_stride, uvd_stride, qdiff);
- // Do mfqe on the second square partition.
- mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
- u + uv_offset * uv_stride + uv_offset,
- v + uv_offset * uv_stride + uv_offset, y_stride,
- uv_stride, yd + y_offset * yd_stride + y_offset,
- ud + uv_offset * uvd_stride + uv_offset,
- vd + uv_offset * uvd_stride + uv_offset,
- yd_stride, uvd_stride, qdiff);
- }
- break;
- case PARTITION_NONE:
- if (mfqe_decision(mi, cur_bs)) {
- // Do mfqe on this partition.
- mfqe_block(cur_bs, y, u, v, y_stride, uv_stride,
- yd, ud, vd, yd_stride, uvd_stride, qdiff);
- } else {
- // Copy the block from current frame(i.e., no mfqe is done).
- copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,
- yd_stride, uvd_stride, bs);
- }
- break;
- case PARTITION_SPLIT:
- // Recursion on four square partitions, e.g. if bs is 64X64,
- // then look into four 32X32 blocks in it.
- mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,
- yd_stride, uvd_stride);
- mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,
- v + uv_offset, y_stride, uv_stride, yd + y_offset,
- ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);
- mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,
- y + y_offset * y_stride, u + uv_offset * uv_stride,
- v + uv_offset * uv_stride, y_stride, uv_stride,
- yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
- vd + uv_offset * uvd_stride, yd_stride, uvd_stride);
- mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset,
- subsize, y + y_offset * y_stride + y_offset,
- u + uv_offset * uv_stride + uv_offset,
- v + uv_offset * uv_stride + uv_offset, y_stride,
- uv_stride, yd + y_offset * yd_stride + y_offset,
- ud + uv_offset * uvd_stride + uv_offset,
- vd + uv_offset * uvd_stride + uv_offset,
- yd_stride, uvd_stride);
- break;
- default:
- assert(0);
- }
-}
-
-void vp10_mfqe(VP10_COMMON *cm) {
- int mi_row, mi_col;
- // Current decoded frame.
- const YV12_BUFFER_CONFIG *show = cm->frame_to_show;
- // Last decoded frame and will store the MFQE result.
- YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
- // Loop through each super block.
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
- MODE_INFO *mi;
- MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col);
- // Motion Info in last frame.
- MODE_INFO *mi_prev = cm->postproc_state.prev_mi +
- (mi_row * cm->mi_stride + mi_col);
- const uint32_t y_stride = show->y_stride;
- const uint32_t uv_stride = show->uv_stride;
- const uint32_t yd_stride = dest->y_stride;
- const uint32_t uvd_stride = dest->uv_stride;
- const uint32_t row_offset_y = mi_row << 3;
- const uint32_t row_offset_uv = mi_row << 2;
- const uint32_t col_offset_y = mi_col << 3;
- const uint32_t col_offset_uv = mi_col << 2;
- const uint8_t *y = show->y_buffer + row_offset_y * y_stride +
- col_offset_y;
- const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride +
- col_offset_uv;
- const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride +
- col_offset_uv;
- uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y;
- uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride +
- col_offset_uv;
- uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride +
- col_offset_uv;
- if (frame_is_intra_only(cm)) {
- mi = mi_prev;
- } else {
- mi = mi_local;
- }
- mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,
- vd, yd_stride, uvd_stride);
- }
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP10_COMMON_MFQE_H_
-#define VP10_COMMON_MFQE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Multiframe Quality Enhancement.
-// The aim for MFQE is to replace pixel blocks in the current frame with
-// the correlated pixel blocks (with higher quality) in the last frame.
-// The replacement can only be taken in stationary blocks by checking
-// the motion of the blocks and other conditions such as the SAD of
-// the current block and correlated block, the variance of the block
-// difference, etc.
-void vp10_mfqe(struct VP10Common *cm);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP10_COMMON_MFQE_H_
+++ /dev/null
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vp10_rtcd.h"
-#include "vp10/common/onyxc_int.h"
-#include "vpx_dsp/mips/macros_msa.h"
-
-static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
- uint8_t *dst_ptr, int32_t dst_stride,
- int32_t src_weight) {
- int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
- int32_t row;
- uint64_t src0_d, src1_d, dst0_d, dst1_d;
- v16i8 src0 = { 0 };
- v16i8 src1 = { 0 };
- v16i8 dst0 = { 0 };
- v16i8 dst1 = { 0 };
- v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
-
- src_wt = __msa_fill_h(src_weight);
- dst_wt = __msa_fill_h(dst_weight);
-
- for (row = 2; row--;) {
- LD2(src_ptr, src_stride, src0_d, src1_d);
- src_ptr += (2 * src_stride);
- LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
- INSERT_D2_SB(src0_d, src1_d, src0);
- INSERT_D2_SB(dst0_d, dst1_d, dst0);
-
- LD2(src_ptr, src_stride, src0_d, src1_d);
- src_ptr += (2 * src_stride);
- LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
- INSERT_D2_SB(src0_d, src1_d, src1);
- INSERT_D2_SB(dst0_d, dst1_d, dst1);
-
- UNPCK_UB_SH(src0, src_r, src_l);
- UNPCK_UB_SH(dst0, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
- ST8x2_UB(dst0, dst_ptr, dst_stride);
- dst_ptr += (2 * dst_stride);
-
- UNPCK_UB_SH(src1, src_r, src_l);
- UNPCK_UB_SH(dst1, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
- ST8x2_UB(dst1, dst_ptr, dst_stride);
- dst_ptr += (2 * dst_stride);
- }
-}
-
-static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
- int32_t src_stride,
- uint8_t *dst_ptr,
- int32_t dst_stride,
- int32_t src_weight) {
- int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
- int32_t row;
- v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
- v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
-
- src_wt = __msa_fill_h(src_weight);
- dst_wt = __msa_fill_h(dst_weight);
-
- for (row = 4; row--;) {
- LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
- src_ptr += (4 * src_stride);
- LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
-
- UNPCK_UB_SH(src0, src_r, src_l);
- UNPCK_UB_SH(dst0, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
- dst_ptr += dst_stride;
-
- UNPCK_UB_SH(src1, src_r, src_l);
- UNPCK_UB_SH(dst1, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
- dst_ptr += dst_stride;
-
- UNPCK_UB_SH(src2, src_r, src_l);
- UNPCK_UB_SH(dst2, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
- dst_ptr += dst_stride;
-
- UNPCK_UB_SH(src3, src_r, src_l);
- UNPCK_UB_SH(dst3, dst_r, dst_l);
- res_h_r = (src_r * src_wt);
- res_h_r += (dst_r * dst_wt);
- res_h_l = (src_l * src_wt);
- res_h_l += (dst_l * dst_wt);
- SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
- PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
- dst_ptr += dst_stride;
- }
-}
-
-void vp10_filter_by_weight8x8_msa(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int src_weight) {
- filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight);
-}
-
-void vp10_filter_by_weight16x16_msa(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int src_weight) {
- filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight);
-}
#include "vp10/common/quant_common.h"
#include "vp10/common/tile_common.h"
-#if CONFIG_VP9_POSTPROC
-#include "vp10/common/postproc.h"
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
int new_fb_idx;
-#if CONFIG_VP9_POSTPROC
- YV12_BUFFER_CONFIG post_proc_buffer;
- YV12_BUFFER_CONFIG post_proc_buffer_int;
-#endif
-
FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
FRAME_TYPE frame_type;
vpx_bit_depth_t bit_depth;
vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer
-#if CONFIG_VP9_POSTPROC
- struct postproc_state postproc_state;
-#endif
-
int error_resilient_mode;
int log2_tile_cols, log2_tile_rows;
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "./vpx_config.h"
-#include "./vpx_scale_rtcd.h"
-#include "./vp10_rtcd.h"
-
-#include "vpx_dsp/vpx_dsp_common.h"
-#include "vpx_ports/mem.h"
-#include "vpx_ports/system_state.h"
-#include "vpx_scale/vpx_scale.h"
-#include "vpx_scale/yv12config.h"
-
-#include "vp10/common/onyxc_int.h"
-#include "vp10/common/postproc.h"
-#include "vp10/common/textblit.h"
-
-#if CONFIG_VP9_POSTPROC
-static const short kernel5[] = {
- 1, 1, 4, 1, 1
-};
-
-const short vp10_rv[] = {
- 8, 5, 2, 2, 8, 12, 4, 9, 8, 3,
- 0, 3, 9, 0, 0, 0, 8, 3, 14, 4,
- 10, 1, 11, 14, 1, 14, 9, 6, 12, 11,
- 8, 6, 10, 0, 0, 8, 9, 0, 3, 14,
- 8, 11, 13, 4, 2, 9, 0, 3, 9, 6,
- 1, 2, 3, 14, 13, 1, 8, 2, 9, 7,
- 3, 3, 1, 13, 13, 6, 6, 5, 2, 7,
- 11, 9, 11, 8, 7, 3, 2, 0, 13, 13,
- 14, 4, 12, 5, 12, 10, 8, 10, 13, 10,
- 4, 14, 4, 10, 0, 8, 11, 1, 13, 7,
- 7, 14, 6, 14, 13, 2, 13, 5, 4, 4,
- 0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
- 8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
- 3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
- 3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
- 13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
- 5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
- 9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
- 4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
- 3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
- 11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
- 5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
- 0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
- 10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
- 4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
- 0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
- 8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
- 3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
- 3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
- 13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
- 5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
- 9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
- 4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
- 3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
- 11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
- 5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
- 0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
- 10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
- 4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
- 3, 8, 3, 7, 8, 5, 11, 4, 12, 3,
- 11, 9, 14, 8, 14, 13, 4, 3, 1, 2,
- 14, 6, 5, 4, 4, 11, 4, 6, 2, 1,
- 5, 8, 8, 12, 13, 5, 14, 10, 12, 13,
- 0, 9, 5, 5, 11, 10, 13, 9, 10, 13,
-};
-
-static const uint8_t q_diff_thresh = 20;
-static const uint8_t last_q_thresh = 170;
-
-void vp10_post_proc_down_and_across_c(const uint8_t *src_ptr,
- uint8_t *dst_ptr,
- int src_pixels_per_line,
- int dst_pixels_per_line,
- int rows,
- int cols,
- int flimit) {
- uint8_t const *p_src;
- uint8_t *p_dst;
- int row, col, i, v, kernel;
- int pitch = src_pixels_per_line;
- uint8_t d[8];
- (void)dst_pixels_per_line;
-
- for (row = 0; row < rows; row++) {
- /* post_proc_down for one row */
- p_src = src_ptr;
- p_dst = dst_ptr;
-
- for (col = 0; col < cols; col++) {
- kernel = 4;
- v = p_src[col];
-
- for (i = -2; i <= 2; i++) {
- if (abs(v - p_src[col + i * pitch]) > flimit)
- goto down_skip_convolve;
-
- kernel += kernel5[2 + i] * p_src[col + i * pitch];
- }
-
- v = (kernel >> 3);
- down_skip_convolve:
- p_dst[col] = v;
- }
-
- /* now post_proc_across */
- p_src = dst_ptr;
- p_dst = dst_ptr;
-
- for (i = 0; i < 8; i++)
- d[i] = p_src[i];
-
- for (col = 0; col < cols; col++) {
- kernel = 4;
- v = p_src[col];
-
- d[col & 7] = v;
-
- for (i = -2; i <= 2; i++) {
- if (abs(v - p_src[col + i]) > flimit)
- goto across_skip_convolve;
-
- kernel += kernel5[2 + i] * p_src[col + i];
- }
-
- d[col & 7] = (kernel >> 3);
- across_skip_convolve:
-
- if (col >= 2)
- p_dst[col - 2] = d[(col - 2) & 7];
- }
-
- /* handle the last two pixels */
- p_dst[col - 2] = d[(col - 2) & 7];
- p_dst[col - 1] = d[(col - 1) & 7];
-
-
- /* next row */
- src_ptr += pitch;
- dst_ptr += pitch;
- }
-}
-
-#if CONFIG_VPX_HIGHBITDEPTH
-void vp10_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr,
- uint16_t *dst_ptr,
- int src_pixels_per_line,
- int dst_pixels_per_line,
- int rows,
- int cols,
- int flimit) {
- uint16_t const *p_src;
- uint16_t *p_dst;
- int row, col, i, v, kernel;
- int pitch = src_pixels_per_line;
- uint16_t d[8];
-
- for (row = 0; row < rows; row++) {
- // post_proc_down for one row.
- p_src = src_ptr;
- p_dst = dst_ptr;
-
- for (col = 0; col < cols; col++) {
- kernel = 4;
- v = p_src[col];
-
- for (i = -2; i <= 2; i++) {
- if (abs(v - p_src[col + i * pitch]) > flimit)
- goto down_skip_convolve;
-
- kernel += kernel5[2 + i] * p_src[col + i * pitch];
- }
-
- v = (kernel >> 3);
-
- down_skip_convolve:
- p_dst[col] = v;
- }
-
- /* now post_proc_across */
- p_src = dst_ptr;
- p_dst = dst_ptr;
-
- for (i = 0; i < 8; i++)
- d[i] = p_src[i];
-
- for (col = 0; col < cols; col++) {
- kernel = 4;
- v = p_src[col];
-
- d[col & 7] = v;
-
- for (i = -2; i <= 2; i++) {
- if (abs(v - p_src[col + i]) > flimit)
- goto across_skip_convolve;
-
- kernel += kernel5[2 + i] * p_src[col + i];
- }
-
- d[col & 7] = (kernel >> 3);
-
- across_skip_convolve:
- if (col >= 2)
- p_dst[col - 2] = d[(col - 2) & 7];
- }
-
- /* handle the last two pixels */
- p_dst[col - 2] = d[(col - 2) & 7];
- p_dst[col - 1] = d[(col - 1) & 7];
-
-
- /* next row */
- src_ptr += pitch;
- dst_ptr += dst_pixels_per_line;
- }
-}
-#endif // CONFIG_VPX_HIGHBITDEPTH
-
-static int q2mbl(int x) {
- if (x < 20) x = 20;
-
- x = 50 + (x - 50) * 10 / 8;
- return x * x / 3;
-}
-
-void vp10_mbpost_proc_across_ip_c(uint8_t *src, int pitch,
- int rows, int cols, int flimit) {
- int r, c, i;
- uint8_t *s = src;
- uint8_t d[16];
-
- for (r = 0; r < rows; r++) {
- int sumsq = 0;
- int sum = 0;
-
- for (i = -8; i <= 6; i++) {
- sumsq += s[i] * s[i];
- sum += s[i];
- d[i + 8] = 0;
- }
-
- for (c = 0; c < cols + 8; c++) {
- int x = s[c + 7] - s[c - 8];
- int y = s[c + 7] + s[c - 8];
-
- sum += x;
- sumsq += x * y;
-
- d[c & 15] = s[c];
-
- if (sumsq * 15 - sum * sum < flimit) {
- d[c & 15] = (8 + sum + s[c]) >> 4;
- }
-
- s[c - 8] = d[(c - 8) & 15];
- }
- s += pitch;
- }
-}
-
-#if CONFIG_VPX_HIGHBITDEPTH
-void vp10_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch,
- int rows, int cols, int flimit) {
- int r, c, i;
-
- uint16_t *s = src;
- uint16_t d[16];
-
-
- for (r = 0; r < rows; r++) {
- int sumsq = 0;
- int sum = 0;
-
- for (i = -8; i <= 6; i++) {
- sumsq += s[i] * s[i];
- sum += s[i];
- d[i + 8] = 0;
- }
-
- for (c = 0; c < cols + 8; c++) {
- int x = s[c + 7] - s[c - 8];
- int y = s[c + 7] + s[c - 8];
-
- sum += x;
- sumsq += x * y;
-
- d[c & 15] = s[c];
-
- if (sumsq * 15 - sum * sum < flimit) {
- d[c & 15] = (8 + sum + s[c]) >> 4;
- }
-
- s[c - 8] = d[(c - 8) & 15];
- }
-
- s += pitch;
- }
-}
-#endif // CONFIG_VPX_HIGHBITDEPTH
-
-void vp10_mbpost_proc_down_c(uint8_t *dst, int pitch,
- int rows, int cols, int flimit) {
- int r, c, i;
- const short *rv3 = &vp10_rv[63 & rand()]; // NOLINT
-
- for (c = 0; c < cols; c++) {
- uint8_t *s = &dst[c];
- int sumsq = 0;
- int sum = 0;
- uint8_t d[16];
- const short *rv2 = rv3 + ((c * 17) & 127);
-
- for (i = -8; i <= 6; i++) {
- sumsq += s[i * pitch] * s[i * pitch];
- sum += s[i * pitch];
- }
-
- for (r = 0; r < rows + 8; r++) {
- sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
- sum += s[7 * pitch] - s[-8 * pitch];
- d[r & 15] = s[0];
-
- if (sumsq * 15 - sum * sum < flimit) {
- d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
- }
-
- s[-8 * pitch] = d[(r - 8) & 15];
- s += pitch;
- }
- }
-}
-
-#if CONFIG_VPX_HIGHBITDEPTH
-void vp10_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch,
- int rows, int cols, int flimit) {
- int r, c, i;
- const int16_t *rv3 = &vp10_rv[63 & rand()]; // NOLINT
-
- for (c = 0; c < cols; c++) {
- uint16_t *s = &dst[c];
- int sumsq = 0;
- int sum = 0;
- uint16_t d[16];
- const int16_t *rv2 = rv3 + ((c * 17) & 127);
-
- for (i = -8; i <= 6; i++) {
- sumsq += s[i * pitch] * s[i * pitch];
- sum += s[i * pitch];
- }
-
- for (r = 0; r < rows + 8; r++) {
- sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
- sum += s[7 * pitch] - s[-8 * pitch];
- d[r & 15] = s[0];
-
- if (sumsq * 15 - sum * sum < flimit) {
- d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
- }
-
- s[-8 * pitch] = d[(r - 8) & 15];
- s += pitch;
- }
- }
-}
-#endif // CONFIG_VPX_HIGHBITDEPTH
-
-static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *post,
- int q,
- int low_var_thresh,
- int flag) {
- double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
- int ppl = (int)(level + .5);
- (void) low_var_thresh;
- (void) flag;
-
-#if CONFIG_VPX_HIGHBITDEPTH
- if (source->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->y_buffer),
- CONVERT_TO_SHORTPTR(post->y_buffer),
- source->y_stride, post->y_stride,
- source->y_height, source->y_width,
- ppl);
-
- vp10_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer),
- post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer),
- post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->u_buffer),
- CONVERT_TO_SHORTPTR(post->u_buffer),
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width,
- ppl);
- vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->v_buffer),
- CONVERT_TO_SHORTPTR(post->v_buffer),
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width,
- ppl);
- } else {
- vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer,
- source->y_stride, post->y_stride,
- source->y_height, source->y_width, ppl);
-
- vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer,
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width, ppl);
- vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer,
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width, ppl);
- }
-#else
- vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer,
- source->y_stride, post->y_stride,
- source->y_height, source->y_width, ppl);
-
- vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
- post->y_width, q2mbl(q));
-
- vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer,
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width, ppl);
- vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer,
- source->uv_stride, post->uv_stride,
- source->uv_height, source->uv_width, ppl);
-#endif // CONFIG_VPX_HIGHBITDEPTH
-}
-
-void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- int q) {
- const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q
- + 0.0065 + 0.5);
- int i;
-
- const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
- const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
- const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width};
- const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height};
-
- uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
- const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
-#if CONFIG_VPX_HIGHBITDEPTH
- assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
- (dst->flags & YV12_FLAG_HIGHBITDEPTH));
- if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(srcs[i]),
- CONVERT_TO_SHORTPTR(dsts[i]),
- src_strides[i], dst_strides[i],
- src_heights[i], src_widths[i], ppl);
- } else {
- vp10_post_proc_down_and_across(srcs[i], dsts[i],
- src_strides[i], dst_strides[i],
- src_heights[i], src_widths[i], ppl);
- }
-#else
- vp10_post_proc_down_and_across(srcs[i], dsts[i],
- src_strides[i], dst_strides[i],
- src_heights[i], src_widths[i], ppl);
-#endif // CONFIG_VPX_HIGHBITDEPTH
- }
-}
-
-void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
- int q) {
- const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q
- + 0.0065 + 0.5);
- int i;
-
- const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
- const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
- const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width};
- const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height};
-
- uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
- const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
-
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- const int src_stride = src_strides[i];
- const int src_width = src_widths[i] - 4;
- const int src_height = src_heights[i] - 4;
- const int dst_stride = dst_strides[i];
-
-#if CONFIG_VPX_HIGHBITDEPTH
- assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
- (dst->flags & YV12_FLAG_HIGHBITDEPTH));
- if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
- const uint16_t *const src_plane = CONVERT_TO_SHORTPTR(
- srcs[i] + 2 * src_stride + 2);
- uint16_t *const dst_plane = CONVERT_TO_SHORTPTR(
- dsts[i] + 2 * dst_stride + 2);
- vp10_highbd_post_proc_down_and_across(src_plane, dst_plane, src_stride,
- dst_stride, src_height, src_width,
- ppl);
- } else {
- const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2;
- uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2;
-
- vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride,
- dst_stride, src_height, src_width, ppl);
- }
-#else
- const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2;
- uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2;
- vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride, dst_stride,
- src_height, src_width, ppl);
-#endif
- }
-}
-
-static double gaussian(double sigma, double mu, double x) {
- return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
- (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
-}
-
-static void fillrd(struct postproc_state *state, int q, int a) {
- char char_dist[300];
-
- double sigma;
- int ai = a, qi = q, i;
-
- vpx_clear_system_state();
-
- sigma = ai + .5 + .6 * (63 - qi) / 63.0;
-
- /* set up a lookup table of 256 entries that matches
- * a gaussian distribution with sigma determined by q.
- */
- {
- int next, j;
-
- next = 0;
-
- for (i = -32; i < 32; i++) {
- int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i));
-
- if (a_i) {
- for (j = 0; j < a_i; j++) {
- char_dist[next + j] = (char) i;
- }
-
- next = next + j;
- }
- }
-
- for (; next < 256; next++)
- char_dist[next] = 0;
- }
-
- for (i = 0; i < 3072; i++) {
- state->noise[i] = char_dist[rand() & 0xff]; // NOLINT
- }
-
- for (i = 0; i < 16; i++) {
- state->blackclamp[i] = -char_dist[0];
- state->whiteclamp[i] = -char_dist[0];
- state->bothclamp[i] = -2 * char_dist[0];
- }
-
- state->last_q = q;
- state->last_noise = a;
-}
-
-void vp10_plane_add_noise_c(uint8_t *start, char *noise,
- char blackclamp[16],
- char whiteclamp[16],
- char bothclamp[16],
- unsigned int width, unsigned int height, int pitch) {
- unsigned int i, j;
-
- // TODO(jbb): why does simd code use both but c doesn't, normalize and
- // fix..
- (void) bothclamp;
- for (i = 0; i < height; i++) {
- uint8_t *pos = start + i * pitch;
- char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
-
- for (j = 0; j < width; j++) {
- if (pos[j] < blackclamp[0])
- pos[j] = blackclamp[0];
-
- if (pos[j] > 255 + whiteclamp[0])
- pos[j] = 255 + whiteclamp[0];
-
- pos[j] += ref[j];
- }
- }
-}
-
-static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
- // Current mip will be the prev_mip for the next frame.
- MODE_INFO *temp = cm->postproc_state.prev_mip;
- cm->postproc_state.prev_mip = cm->mip;
- cm->mip = temp;
-
- // Update the upper left visible macroblock ptrs.
- cm->mi = cm->mip + cm->mi_stride + 1;
- cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1;
-}
-
-int vp10_post_proc_frame(struct VP10Common *cm,
- YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *ppflags) {
- const int q = VPXMIN(105, cm->lf.filter_level * 2);
- const int flags = ppflags->post_proc_flag;
- YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer;
- struct postproc_state *const ppstate = &cm->postproc_state;
-
- if (!cm->frame_to_show)
- return -1;
-
- if (!flags) {
- *dest = *cm->frame_to_show;
- return 0;
- }
-
- vpx_clear_system_state();
-
- // Alloc memory for prev_mip in the first frame.
- if (cm->current_video_frame == 1) {
- cm->postproc_state.last_base_qindex = cm->base_qindex;
- cm->postproc_state.last_frame_valid = 1;
- ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip));
- if (!ppstate->prev_mip) {
- return 1;
- }
- ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1;
- memset(ppstate->prev_mip, 0,
- cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
- }
-
- // Allocate post_proc_buffer_int if needed.
- if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) {
- if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) {
- const int width = ALIGN_POWER_OF_TWO(cm->width, 4);
- const int height = ALIGN_POWER_OF_TWO(cm->height, 4);
-
- if (vpx_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VPX_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif // CONFIG_VPX_HIGHBITDEPTH
- VPX_ENC_BORDER_IN_PIXELS,
- cm->byte_alignment) < 0) {
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate MFQE framebuffer");
- }
-
- // Ensure that postproc is set to all 0s so that post proc
- // doesn't pull random data in from edge.
- memset(cm->post_proc_buffer_int.buffer_alloc, 128,
- cm->post_proc_buffer.frame_size);
- }
- }
-
- if (vpx_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VPX_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment,
- NULL, NULL, NULL) < 0)
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate post-processing buffer");
-
- if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 &&
- cm->postproc_state.last_frame_valid && cm->bit_depth == 8 &&
- cm->postproc_state.last_base_qindex <= last_q_thresh &&
- cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) {
- vp10_mfqe(cm);
- // TODO(jackychen): Consider whether enable deblocking by default
- // if mfqe is enabled. Need to take both the quality and the speed
- // into consideration.
- if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) {
- vp8_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int);
- }
- if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) {
- deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf,
- q + (ppflags->deblocking_level - 5) * 10,
- 1, 0);
- } else if (flags & VP9D_DEBLOCK) {
- vp10_deblock(&cm->post_proc_buffer_int, ppbuf, q);
- } else {
- vp8_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf);
- }
- } else if (flags & VP9D_DEMACROBLOCK) {
- deblock_and_de_macro_block(cm->frame_to_show, ppbuf,
- q + (ppflags->deblocking_level - 5) * 10, 1, 0);
- } else if (flags & VP9D_DEBLOCK) {
- vp10_deblock(cm->frame_to_show, ppbuf, q);
- } else {
- vp8_yv12_copy_frame(cm->frame_to_show, ppbuf);
- }
-
- cm->postproc_state.last_base_qindex = cm->base_qindex;
- cm->postproc_state.last_frame_valid = 1;
-
- if (flags & VP9D_ADDNOISE) {
- const int noise_level = ppflags->noise_level;
- if (ppstate->last_q != q ||
- ppstate->last_noise != noise_level) {
- fillrd(ppstate, 63 - q, noise_level);
- }
-
- vp10_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
- ppstate->whiteclamp, ppstate->bothclamp,
- ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
- }
-
- *dest = *ppbuf;
-
- /* handle problem with extending borders */
- dest->y_width = cm->width;
- dest->y_height = cm->height;
- dest->uv_width = dest->y_width >> cm->subsampling_x;
- dest->uv_height = dest->y_height >> cm->subsampling_y;
-
- swap_mi_and_prev_mi(cm);
- return 0;
-}
-#endif // CONFIG_VP9_POSTPROC
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP10_COMMON_POSTPROC_H_
-#define VP10_COMMON_POSTPROC_H_
-
-#include "vpx_ports/mem.h"
-#include "vpx_scale/yv12config.h"
-#include "vp10/common/blockd.h"
-#include "vp10/common/mfqe.h"
-#include "vp10/common/ppflags.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct postproc_state {
- int last_q;
- int last_noise;
- char noise[3072];
- int last_base_qindex;
- int last_frame_valid;
- MODE_INFO *prev_mip;
- MODE_INFO *prev_mi;
- DECLARE_ALIGNED(16, char, blackclamp[16]);
- DECLARE_ALIGNED(16, char, whiteclamp[16]);
- DECLARE_ALIGNED(16, char, bothclamp[16]);
-};
-
-struct VP10Common;
-
-#define MFQE_PRECISION 4
-
-int vp10_post_proc_frame(struct VP10Common *cm,
- YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *flags);
-
-void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
-
-void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP10_COMMON_POSTPROC_H_
+++ /dev/null
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP10_COMMON_PPFLAGS_H_
-#define VP10_COMMON_PPFLAGS_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum {
- VP9D_NOFILTERING = 0,
- VP9D_DEBLOCK = 1 << 0,
- VP9D_DEMACROBLOCK = 1 << 1,
- VP9D_ADDNOISE = 1 << 2,
- VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3,
- VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4,
- VP9D_DEBUG_TXT_DC_DIFF = 1 << 5,
- VP9D_DEBUG_TXT_RATE_INFO = 1 << 6,
- VP9D_DEBUG_DRAW_MV = 1 << 7,
- VP9D_DEBUG_CLR_BLK_MODES = 1 << 8,
- VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9,
- VP9D_MFQE = 1 << 10
-};
-
-typedef struct {
- int post_proc_flag;
- int deblocking_level;
- int noise_level;
-} vp10_ppflags_t;
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP10_COMMON_PPFLAGS_H_
$avx2_x86_64 = 'avx2';
}
-#
-# post proc
-#
-if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
-add_proto qw/void vp10_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
-specialize qw/vp10_mbpost_proc_down sse2/;
-$vp10_mbpost_proc_down_sse2=vp10_mbpost_proc_down_xmm;
-
-add_proto qw/void vp10_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
-specialize qw/vp10_mbpost_proc_across_ip sse2/;
-$vp10_mbpost_proc_across_ip_sse2=vp10_mbpost_proc_across_ip_xmm;
-
-add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
-specialize qw/vp10_post_proc_down_and_across sse2/;
-$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
-
-add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
-specialize qw/vp10_plane_add_noise sse2/;
-$vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
-
-add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
-specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
-
-add_proto qw/void vp10_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
-specialize qw/vp10_filter_by_weight8x8 sse2 msa/;
-}
-
#
# dct
#
+++ /dev/null
-;
-; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-; This file is a duplicate of mfqe_sse2.asm in VP8.
-; TODO(jackychen): Find a way to fix the duplicate.
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp10_filter_by_weight16x16_sse2
-;(
-; unsigned char *src,
-; int src_stride,
-; unsigned char *dst,
-; int dst_stride,
-; int src_weight
-;)
-global sym(vp10_filter_by_weight16x16_sse2) PRIVATE
-sym(vp10_filter_by_weight16x16_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movd xmm0, arg(4) ; src_weight
- pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
- punpcklqdq xmm0, xmm0 ; replicate to all hi words
-
- movdqa xmm1, [GLOBAL(tMFQE)]
- psubw xmm1, xmm0 ; dst_weight
-
- mov rax, arg(0) ; src
- mov rsi, arg(1) ; src_stride
- mov rdx, arg(2) ; dst
- mov rdi, arg(3) ; dst_stride
-
- mov rcx, 16 ; loop count
- pxor xmm6, xmm6
-
-.combine
- movdqa xmm2, [rax]
- movdqa xmm4, [rdx]
- add rax, rsi
-
- ; src * src_weight
- movdqa xmm3, xmm2
- punpcklbw xmm2, xmm6
- punpckhbw xmm3, xmm6
- pmullw xmm2, xmm0
- pmullw xmm3, xmm0
-
- ; dst * dst_weight
- movdqa xmm5, xmm4
- punpcklbw xmm4, xmm6
- punpckhbw xmm5, xmm6
- pmullw xmm4, xmm1
- pmullw xmm5, xmm1
-
- ; sum, round and shift
- paddw xmm2, xmm4
- paddw xmm3, xmm5
- paddw xmm2, [GLOBAL(tMFQE_round)]
- paddw xmm3, [GLOBAL(tMFQE_round)]
- psrlw xmm2, 4
- psrlw xmm3, 4
-
- packuswb xmm2, xmm3
- movdqa [rdx], xmm2
- add rdx, rdi
-
- dec rcx
- jnz .combine
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
-
- ret
-
-;void vp10_filter_by_weight8x8_sse2
-;(
-; unsigned char *src,
-; int src_stride,
-; unsigned char *dst,
-; int dst_stride,
-; int src_weight
-;)
-global sym(vp10_filter_by_weight8x8_sse2) PRIVATE
-sym(vp10_filter_by_weight8x8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- movd xmm0, arg(4) ; src_weight
- pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
- punpcklqdq xmm0, xmm0 ; replicate to all hi words
-
- movdqa xmm1, [GLOBAL(tMFQE)]
- psubw xmm1, xmm0 ; dst_weight
-
- mov rax, arg(0) ; src
- mov rsi, arg(1) ; src_stride
- mov rdx, arg(2) ; dst
- mov rdi, arg(3) ; dst_stride
-
- mov rcx, 8 ; loop count
- pxor xmm4, xmm4
-
-.combine
- movq xmm2, [rax]
- movq xmm3, [rdx]
- add rax, rsi
-
- ; src * src_weight
- punpcklbw xmm2, xmm4
- pmullw xmm2, xmm0
-
- ; dst * dst_weight
- punpcklbw xmm3, xmm4
- pmullw xmm3, xmm1
-
- ; sum, round and shift
- paddw xmm2, xmm3
- paddw xmm2, [GLOBAL(tMFQE_round)]
- psrlw xmm2, 4
-
- packuswb xmm2, xmm4
- movq [rdx], xmm2
- add rdx, rdi
-
- dec rcx
- jnz .combine
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
-
- ret
-
-;void vp10_variance_and_sad_16x16_sse2 | arg
-;(
-; unsigned char *src1, 0
-; int stride1, 1
-; unsigned char *src2, 2
-; int stride2, 3
-; unsigned int *variance, 4
-; unsigned int *sad, 5
-;)
-global sym(vp10_variance_and_sad_16x16_sse2) PRIVATE
-sym(vp10_variance_and_sad_16x16_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rax, arg(0) ; src1
- mov rcx, arg(1) ; stride1
- mov rdx, arg(2) ; src2
- mov rdi, arg(3) ; stride2
-
- mov rsi, 16 ; block height
-
- ; Prep accumulator registers
- pxor xmm3, xmm3 ; SAD
- pxor xmm4, xmm4 ; sum of src2
- pxor xmm5, xmm5 ; sum of src2^2
-
- ; Because we're working with the actual output frames
- ; we can't depend on any kind of data alignment.
-.accumulate
- movdqa xmm0, [rax] ; src1
- movdqa xmm1, [rdx] ; src2
- add rax, rcx ; src1 + stride1
- add rdx, rdi ; src2 + stride2
-
- ; SAD(src1, src2)
- psadbw xmm0, xmm1
- paddusw xmm3, xmm0
-
- ; SUM(src2)
- pxor xmm2, xmm2
- psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0
- paddusw xmm4, xmm2
-
- ; pmaddubsw would be ideal if it took two unsigned values. instead,
- ; it expects a signed and an unsigned value. so instead we zero extend
- ; and operate on words.
- pxor xmm2, xmm2
- movdqa xmm0, xmm1
- punpcklbw xmm0, xmm2
- punpckhbw xmm1, xmm2
- pmaddwd xmm0, xmm0
- pmaddwd xmm1, xmm1
- paddd xmm5, xmm0
- paddd xmm5, xmm1
-
- sub rsi, 1
- jnz .accumulate
-
- ; phaddd only operates on adjacent double words.
- ; Finalize SAD and store
- movdqa xmm0, xmm3
- psrldq xmm0, 8
- paddusw xmm0, xmm3
- paddd xmm0, [GLOBAL(t128)]
- psrld xmm0, 8
-
- mov rax, arg(5)
- movd [rax], xmm0
-
- ; Accumulate sum of src2
- movdqa xmm0, xmm4
- psrldq xmm0, 8
- paddusw xmm0, xmm4
- ; Square src2. Ignore high value
- pmuludq xmm0, xmm0
- psrld xmm0, 8
-
- ; phaddw could be used to sum adjacent values but we want
- ; all the values summed. promote to doubles, accumulate,
- ; shift and sum
- pxor xmm2, xmm2
- movdqa xmm1, xmm5
- punpckldq xmm1, xmm2
- punpckhdq xmm5, xmm2
- paddd xmm1, xmm5
- movdqa xmm2, xmm1
- psrldq xmm1, 8
- paddd xmm1, xmm2
-
- psubd xmm1, xmm0
-
- ; (variance + 128) >> 8
- paddd xmm1, [GLOBAL(t128)]
- psrld xmm1, 8
- mov rax, arg(4)
-
- movd [rax], xmm1
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-t128:
-%ifndef __NASM_VER__
- ddq 128
-%elif CONFIG_BIG_ENDIAN
- dq 0, 128
-%else
- dq 128, 0
-%endif
-align 16
-tMFQE: ; 1 << MFQE_PRECISION
- times 8 dw 0x10
-align 16
-tMFQE_round: ; 1 << (MFQE_PRECISION - 1)
- times 8 dw 0x08
+++ /dev/null
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp10_post_proc_down_and_across_xmm
-;(
-; unsigned char *src_ptr,
-; unsigned char *dst_ptr,
-; int src_pixels_per_line,
-; int dst_pixels_per_line,
-; int rows,
-; int cols,
-; int flimit
-;)
-global sym(vp10_post_proc_down_and_across_xmm) PRIVATE
-sym(vp10_post_proc_down_and_across_xmm):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- ALIGN_STACK 16, rax
- ; move the global rd onto the stack, since we don't have enough registers
- ; to do PIC addressing
- movdqa xmm0, [GLOBAL(rd42)]
- sub rsp, 16
- movdqa [rsp], xmm0
-%define RD42 [rsp]
-%else
-%define RD42 [GLOBAL(rd42)]
-%endif
-
-
- movd xmm2, dword ptr arg(6) ;flimit
- punpcklwd xmm2, xmm2
- punpckldq xmm2, xmm2
- punpcklqdq xmm2, xmm2
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(1) ;dst_ptr
-
- movsxd rcx, DWORD PTR arg(4) ;rows
- movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
- pxor xmm0, xmm0 ; mm0 = 00000000
-
-.nextrow:
-
- xor rdx, rdx ; clear out rdx for use as loop counter
-.nextcol:
- movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7
- punpcklbw xmm3, xmm0 ; mm3 = p0..p3
- movdqa xmm1, xmm3 ; mm1 = p0..p3
- psllw xmm3, 2 ;
-
- movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm6
-
- ; thresholding
- movdqa xmm7, xmm1 ; mm7 = r0 p0..p3
- psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3
- paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
- pcmpgtw xmm7, xmm2
-
- movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
-
- neg rax
- movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7
- punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7
- punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3
- paddusw xmm3, xmm4 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
- psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3
- psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3
- paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
-
- paddusw xmm3, RD42 ; mm3 += round value
- psraw xmm3, 3 ; mm3 /= 8
-
- pand xmm1, xmm7 ; mm1 select vals > thresh from source
- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
- paddusw xmm1, xmm7 ; combination
-
- packuswb xmm1, xmm0 ; pack to bytes
- movq QWORD PTR [rdi], xmm1 ;
-
- neg rax ; pitch is positive
- add rsi, 8
- add rdi, 8
-
- add rdx, 8
- cmp edx, dword arg(5) ;cols
-
- jl .nextcol
-
- ; done with the all cols, start the across filtering in place
- sub rsi, rdx
- sub rdi, rdx
-
- xor rdx, rdx
- movq mm0, QWORD PTR [rdi-8];
-
-.acrossnextcol:
- movq xmm7, QWORD PTR [rdi +rdx -2]
- movd xmm4, DWORD PTR [rdi +rdx +6]
-
- pslldq xmm4, 8
- por xmm4, xmm7
-
- movdqa xmm3, xmm4
- psrldq xmm3, 2
- punpcklbw xmm3, xmm0 ; mm3 = p0..p3
- movdqa xmm1, xmm3 ; mm1 = p0..p3
- psllw xmm3, 2
-
-
- movdqa xmm5, xmm4
- psrldq xmm5, 3
- punpcklbw xmm5, xmm0 ; mm5 = p1..p4
- paddusw xmm3, xmm5 ; mm3 += mm6
-
- ; thresholding
- movdqa xmm7, xmm1 ; mm7 = p0..p3
- psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm7, xmm2
-
- movdqa xmm5, xmm4
- psrldq xmm5, 4
- punpcklbw xmm5, xmm0 ; mm5 = p2..p5
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
-
- movdqa xmm5, xmm4 ; mm5 = p-2..p5
- punpcklbw xmm5, xmm0 ; mm5 = p-2..p1
- paddusw xmm3, xmm5 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
- psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- psrldq xmm4, 1 ; mm4 = p-1..p5
- punpcklbw xmm4, xmm0 ; mm4 = p-1..p2
- paddusw xmm3, xmm4 ; mm3 += mm5
-
- ; thresholding
- movdqa xmm6, xmm1 ; mm6 = p0..p3
- psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4
- psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3
- paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4)
- pcmpgtw xmm6, xmm2
- por xmm7, xmm6 ; accumulate thresholds
-
- paddusw xmm3, RD42 ; mm3 += round value
- psraw xmm3, 3 ; mm3 /= 8
-
- pand xmm1, xmm7 ; mm1 select vals > thresh from source
- pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
- paddusw xmm1, xmm7 ; combination
-
- packuswb xmm1, xmm0 ; pack to bytes
- movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes
- movdq2q mm0, xmm1
-
- add rdx, 8
- cmp edx, dword arg(5) ;cols
- jl .acrossnextcol;
-
- ; last 8 pixels
- movq QWORD PTR [rdi+rdx-8], mm0
-
- ; done with this rwo
- add rsi,rax ; next line
- mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch?
- add rdi,rax ; next destination
- mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch?
-
- dec rcx ; decrement count
- jnz .nextrow ; next row
-
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- add rsp,16
- pop rsp
-%endif
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef RD42
-
-
-;void vp10_mbpost_proc_down_xmm(unsigned char *dst,
-; int pitch, int rows, int cols,int flimit)
-extern sym(vp10_rv)
-global sym(vp10_mbpost_proc_down_xmm) PRIVATE
-sym(vp10_mbpost_proc_down_xmm):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 128+16
-
- ; unsigned char d[16][8] at [rsp]
- ; create flimit2 at [rsp+128]
- mov eax, dword ptr arg(4) ;flimit
- mov [rsp+128], eax
- mov [rsp+128+4], eax
- mov [rsp+128+8], eax
- mov [rsp+128+12], eax
-%define flimit4 [rsp+128]
-
-%if ABI_IS_32BIT=0
- lea r8, [GLOBAL(sym(vp10_rv))]
-%endif
-
- ;rows +=8;
- add dword arg(2), 8
-
- ;for(c=0; c<cols; c+=8)
-.loop_col:
- mov rsi, arg(0) ; s
- pxor xmm0, xmm0 ;
-
- movsxd rax, dword ptr arg(1) ;pitch ;
- neg rax ; rax = -pitch
-
- lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8]
- neg rax
-
-
- pxor xmm5, xmm5
- pxor xmm6, xmm6 ;
-
- pxor xmm7, xmm7 ;
- mov rdi, rsi
-
- mov rcx, 15 ;
-
-.loop_initvar:
- movq xmm1, QWORD PTR [rdi];
- punpcklbw xmm1, xmm0 ;
-
- paddw xmm5, xmm1 ;
- pmullw xmm1, xmm1 ;
-
- movdqa xmm2, xmm1 ;
- punpcklwd xmm1, xmm0 ;
-
- punpckhwd xmm2, xmm0 ;
- paddd xmm6, xmm1 ;
-
- paddd xmm7, xmm2 ;
- lea rdi, [rdi+rax] ;
-
- dec rcx
- jne .loop_initvar
- ;save the var and sum
- xor rdx, rdx
-.loop_row:
- movq xmm1, QWORD PTR [rsi] ; [s-pitch*8]
- movq xmm2, QWORD PTR [rdi] ; [s+pitch*7]
-
- punpcklbw xmm1, xmm0
- punpcklbw xmm2, xmm0
-
- paddw xmm5, xmm2
- psubw xmm5, xmm1
-
- pmullw xmm2, xmm2
- movdqa xmm4, xmm2
-
- punpcklwd xmm2, xmm0
- punpckhwd xmm4, xmm0
-
- paddd xmm6, xmm2
- paddd xmm7, xmm4
-
- pmullw xmm1, xmm1
- movdqa xmm2, xmm1
-
- punpcklwd xmm1, xmm0
- psubd xmm6, xmm1
-
- punpckhwd xmm2, xmm0
- psubd xmm7, xmm2
-
-
- movdqa xmm3, xmm6
- pslld xmm3, 4
-
- psubd xmm3, xmm6
- movdqa xmm1, xmm5
-
- movdqa xmm4, xmm5
- pmullw xmm1, xmm1
-
- pmulhw xmm4, xmm4
- movdqa xmm2, xmm1
-
- punpcklwd xmm1, xmm4
- punpckhwd xmm2, xmm4
-
- movdqa xmm4, xmm7
- pslld xmm4, 4
-
- psubd xmm4, xmm7
-
- psubd xmm3, xmm1
- psubd xmm4, xmm2
-
- psubd xmm3, flimit4
- psubd xmm4, flimit4
-
- psrad xmm3, 31
- psrad xmm4, 31
-
- packssdw xmm3, xmm4
- packsswb xmm3, xmm0
-
- movq xmm1, QWORD PTR [rsi+rax*8]
-
- movq xmm2, xmm1
- punpcklbw xmm1, xmm0
-
- paddw xmm1, xmm5
- mov rcx, rdx
-
- and rcx, 127
-%if ABI_IS_32BIT=1 && CONFIG_PIC=1
- push rax
- lea rax, [GLOBAL(sym(vp10_rv))]
- movdqu xmm4, [rax + rcx*2] ;vp10_rv[rcx*2]
- pop rax
-%elif ABI_IS_32BIT=0
- movdqu xmm4, [r8 + rcx*2] ;vp10_rv[rcx*2]
-%else
- movdqu xmm4, [sym(vp10_rv) + rcx*2]
-%endif
-
- paddw xmm1, xmm4
- ;paddw xmm1, eight8s
- psraw xmm1, 4
-
- packuswb xmm1, xmm0
- pand xmm1, xmm3
-
- pandn xmm3, xmm2
- por xmm1, xmm3
-
- and rcx, 15
- movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8]
-
- mov rcx, rdx
- sub rcx, 8
-
- and rcx, 15
- movq mm0, [rsp + rcx*8] ;d[rcx*8]
-
- movq [rsi], mm0
- lea rsi, [rsi+rax]
-
- lea rdi, [rdi+rax]
- add rdx, 1
-
- cmp edx, dword arg(2) ;rows
- jl .loop_row
-
- add dword arg(0), 8 ; s += 8
- sub dword arg(3), 8 ; cols -= 8
- cmp dword arg(3), 0
- jg .loop_col
-
- add rsp, 128+16
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef flimit4
-
-
-;void vp10_mbpost_proc_across_ip_xmm(unsigned char *src,
-; int pitch, int rows, int cols,int flimit)
-global sym(vp10_mbpost_proc_across_ip_xmm) PRIVATE
-sym(vp10_mbpost_proc_across_ip_xmm):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16
-
- ; create flimit4 at [rsp]
- mov eax, dword ptr arg(4) ;flimit
- mov [rsp], eax
- mov [rsp+4], eax
- mov [rsp+8], eax
- mov [rsp+12], eax
-%define flimit4 [rsp]
-
-
- ;for(r=0;r<rows;r++)
-.ip_row_loop:
-
- xor rdx, rdx ;sumsq=0;
- xor rcx, rcx ;sum=0;
- mov rsi, arg(0); s
- mov rdi, -8
-.ip_var_loop:
- ;for(i=-8;i<=6;i++)
- ;{
- ; sumsq += s[i]*s[i];
- ; sum += s[i];
- ;}
- movzx eax, byte [rsi+rdi]
- add ecx, eax
- mul al
- add edx, eax
- add rdi, 1
- cmp rdi, 6
- jle .ip_var_loop
-
-
- ;mov rax, sumsq
- ;movd xmm7, rax
- movd xmm7, edx
-
- ;mov rax, sum
- ;movd xmm6, rax
- movd xmm6, ecx
-
- mov rsi, arg(0) ;s
- xor rcx, rcx
-
- movsxd rdx, dword arg(3) ;cols
- add rdx, 8
- pxor mm0, mm0
- pxor mm1, mm1
-
- pxor xmm0, xmm0
-.nextcol4:
-
- movd xmm1, DWORD PTR [rsi+rcx-8] ; -8 -7 -6 -5
- movd xmm2, DWORD PTR [rsi+rcx+7] ; +7 +8 +9 +10
-
- punpcklbw xmm1, xmm0 ; expanding
- punpcklbw xmm2, xmm0 ; expanding
-
- punpcklwd xmm1, xmm0 ; expanding to dwords
- punpcklwd xmm2, xmm0 ; expanding to dwords
-
- psubd xmm2, xmm1 ; 7--8 8--7 9--6 10--5
- paddd xmm1, xmm1 ; -8*2 -7*2 -6*2 -5*2
-
- paddd xmm1, xmm2 ; 7+-8 8+-7 9+-6 10+-5
- pmaddwd xmm1, xmm2 ; squared of 7+-8 8+-7 9+-6 10+-5
-
- paddd xmm6, xmm2
- paddd xmm7, xmm1
-
- pshufd xmm6, xmm6, 0 ; duplicate the last ones
- pshufd xmm7, xmm7, 0 ; duplicate the last ones
-
- psrldq xmm1, 4 ; 8--7 9--6 10--5 0000
- psrldq xmm2, 4 ; 8--7 9--6 10--5 0000
-
- pshufd xmm3, xmm1, 3 ; 0000 8--7 8--7 8--7 squared
- pshufd xmm4, xmm2, 3 ; 0000 8--7 8--7 8--7 squared
-
- paddd xmm6, xmm4
- paddd xmm7, xmm3
-
- pshufd xmm3, xmm1, 01011111b ; 0000 0000 9--6 9--6 squared
- pshufd xmm4, xmm2, 01011111b ; 0000 0000 9--6 9--6 squared
-
- paddd xmm7, xmm3
- paddd xmm6, xmm4
-
- pshufd xmm3, xmm1, 10111111b ; 0000 0000 8--7 8--7 squared
- pshufd xmm4, xmm2, 10111111b ; 0000 0000 8--7 8--7 squared
-
- paddd xmm7, xmm3
- paddd xmm6, xmm4
-
- movdqa xmm3, xmm6
- pmaddwd xmm3, xmm3
-
- movdqa xmm5, xmm7
- pslld xmm5, 4
-
- psubd xmm5, xmm7
- psubd xmm5, xmm3
-
- psubd xmm5, flimit4
- psrad xmm5, 31
-
- packssdw xmm5, xmm0
- packsswb xmm5, xmm0
-
- movd xmm1, DWORD PTR [rsi+rcx]
- movq xmm2, xmm1
-
- punpcklbw xmm1, xmm0
- punpcklwd xmm1, xmm0
-
- paddd xmm1, xmm6
- paddd xmm1, [GLOBAL(four8s)]
-
- psrad xmm1, 4
- packssdw xmm1, xmm0
-
- packuswb xmm1, xmm0
- pand xmm1, xmm5
-
- pandn xmm5, xmm2
- por xmm5, xmm1
-
- movd [rsi+rcx-8], mm0
- movq mm0, mm1
-
- movdq2q mm1, xmm5
- psrldq xmm7, 12
-
- psrldq xmm6, 12
- add rcx, 4
-
- cmp rcx, rdx
- jl .nextcol4
-
- ;s+=pitch;
- movsxd rax, dword arg(1)
- add arg(0), rax
-
- sub dword arg(2), 1 ;rows-=1
- cmp dword arg(2), 0
- jg .ip_row_loop
-
- add rsp, 16
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%undef flimit4
-
-
-;void vp10_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
-; unsigned char blackclamp[16],
-; unsigned char whiteclamp[16],
-; unsigned char bothclamp[16],
-; unsigned int width, unsigned int height, int pitch)
-global sym(vp10_plane_add_noise_wmt) PRIVATE
-sym(vp10_plane_add_noise_wmt):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-.addnoise_loop:
- call sym(LIBVPX_RAND) WRT_PLT
- mov rcx, arg(1) ;noise
- and rax, 0xff
- add rcx, rax
-
- ; we rely on the fact that the clamping vectors are stored contiguously
- ; in black/white/both order. Note that we have to reload this here because
- ; rdx could be trashed by rand()
- mov rdx, arg(2) ; blackclamp
-
-
- mov rdi, rcx
- movsxd rcx, dword arg(5) ;[Width]
- mov rsi, arg(0) ;Pos
- xor rax,rax
-
-.addnoise_nextset:
- movdqu xmm1,[rsi+rax] ; get the source
-
- psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
- paddusb xmm1, [rdx+32] ;bothclamp
- psubusb xmm1, [rdx+16] ;whiteclamp
-
- movdqu xmm2,[rdi+rax] ; get the noise for this line
- paddb xmm1,xmm2 ; add it in
- movdqu [rsi+rax],xmm1 ; store the result
-
- add rax,16 ; move to the next line
-
- cmp rax, rcx
- jl .addnoise_nextset
-
- movsxd rax, dword arg(7) ; Pitch
- add arg(0), rax ; Start += Pitch
- sub dword arg(6), 1 ; Height -= 1
- jg .addnoise_loop
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-SECTION_RODATA
-align 16
-rd42:
- times 8 dw 0x04
-four8s:
- times 4 dd 8
#include "vp10/common/alloccommon.h"
#include "vp10/common/loopfilter.h"
#include "vp10/common/onyxc_int.h"
-#if CONFIG_VP9_POSTPROC
-#include "vp10/common/postproc.h"
-#endif
#include "vp10/common/quant_common.h"
#include "vp10/common/reconintra.h"
}
vpx_codec_err_t vp10_copy_reference_dec(VP10Decoder *pbi,
- VP9_REFFRAME ref_frame_flag,
- YV12_BUFFER_CONFIG *sd) {
+ VP9_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
VP10_COMMON *cm = &pbi->common;
/* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
return retcode;
}
-int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd,
- vp10_ppflags_t *flags) {
+int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd) {
VP10_COMMON *const cm = &pbi->common;
int ret = -1;
-#if !CONFIG_VP9_POSTPROC
- (void)*flags;
-#endif
if (pbi->ready_for_new_data == 1)
return ret;
pbi->ready_for_new_data = 1;
-#if CONFIG_VP9_POSTPROC
- if (!cm->show_existing_frame) {
- ret = vp10_post_proc_frame(cm, sd, flags);
- } else {
- *sd = *cm->frame_to_show;
- ret = 0;
- }
-#else
*sd = *cm->frame_to_show;
ret = 0;
-#endif /*!CONFIG_POSTPROC*/
vpx_clear_system_state();
return ret;
}
#include "vp10/common/thread_common.h"
#include "vp10/common/onyxc_int.h"
-#include "vp10/common/ppflags.h"
#include "vp10/decoder/dthread.h"
#ifdef __cplusplus
int vp10_receive_compressed_data(struct VP10Decoder *pbi,
size_t size, const uint8_t **dest);
-int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd,
- vp10_ppflags_t *flags);
+int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp10_copy_reference_dec(struct VP10Decoder *pbi,
VP9_REFFRAME ref_frame_flag,
#include "vp10/common/alloccommon.h"
#include "vp10/common/filter.h"
#include "vp10/common/idct.h"
-#if CONFIG_VP9_POSTPROC
-#include "vp10/common/postproc.h"
-#endif
#include "vp10/common/reconinter.h"
#include "vp10/common/reconintra.h"
#include "vp10/common/tile_common.h"
cpi->active_map.map = NULL;
vp10_free_ref_frame_buffers(cm->buffer_pool);
-#if CONFIG_VP9_POSTPROC
- vp10_free_postproc_buffers(cm);
-#endif
vp10_free_context_buffers(cm);
vpx_free_frame_buffer(&cpi->last_frame_uf);
vp10_remove_common(cm);
vp10_free_ref_frame_buffers(cm->buffer_pool);
-#if CONFIG_VP9_POSTPROC
- vp10_free_postproc_buffers(cm);
-#endif
vpx_free(cpi);
#if CONFIG_VP9_TEMPORAL_DENOISING
// lagged coding, and if the relevant speed feature flag is set.
if (oxcf->pass == 2 && cpi->sf.static_segmentation)
configure_static_seg_features(cpi);
-
-#if CONFIG_VP9_POSTPROC
- if (oxcf->noise_sensitivity > 0) {
- int l = 0;
- switch (oxcf->noise_sensitivity) {
- case 1:
- l = 20;
- break;
- case 2:
- l = 40;
- break;
- case 3:
- l = 60;
- break;
- case 4:
- case 5:
- l = 100;
- break;
- case 6:
- l = 150;
- break;
- }
- vp10_denoise(cpi->Source, cpi->Source, l);
- }
-#endif // CONFIG_VP9_POSTPROC
}
static void init_motion_estimation(VP10_COMP *cpi) {
{
PSNR_STATS psnr2;
double frame_ssim2 = 0, weight = 0;
-#if CONFIG_VP9_POSTPROC
- if (vpx_alloc_frame_buffer(&cm->post_proc_buffer,
- recon->y_crop_width, recon->y_crop_height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VPX_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VPX_ENC_BORDER_IN_PIXELS,
- cm->byte_alignment) < 0) {
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate post processing buffer");
- }
-
- vp10_deblock(cm->frame_to_show, &cm->post_proc_buffer,
- cm->lf.filter_level * 10 / 6);
-#endif
vpx_clear_system_state();
#if CONFIG_VPX_HIGHBITDEPTH
return 0;
}
-int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest,
- vp10_ppflags_t *flags) {
+int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
VP10_COMMON *cm = &cpi->common;
-#if !CONFIG_VP9_POSTPROC
- (void)flags;
-#endif
if (!cm->show_frame) {
return -1;
} else {
int ret;
-#if CONFIG_VP9_POSTPROC
- ret = vp10_post_proc_frame(cm, dest, flags);
-#else
if (cm->frame_to_show) {
*dest = *cm->frame_to_show;
dest->y_width = cm->width;
} else {
ret = -1;
}
-#endif // !CONFIG_VP9_POSTPROC
vpx_clear_system_state();
return ret;
}
#include "vpx/vp8cx.h"
#include "vp10/common/alloccommon.h"
-#include "vp10/common/ppflags.h"
#include "vp10/common/entropymode.h"
#include "vp10/common/thread_common.h"
#include "vp10/common/onyxc_int.h"
size_t *size, uint8_t *dest,
int64_t *time_stamp, int64_t *time_end, int flush);
-int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest,
- vp10_ppflags_t *flags);
+int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest);
int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags);
for (mb_row = 0; mb_row < mb_rows; mb_row++) {
// Source frames are extended to 16 pixels. This is different than
- // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
+ // L/A/G reference frames that have a border of 32 (VPXENCBORDERINPIXELS)
// A 6/8 tap filter is used for motion search. This requires 2 pixels
// before and 3 pixels after. So the largest Y mv on a border would
// then be 16 - VPX_INTERP_EXTEND. The UV blocks are half the size of the
VP10_COMMON_SRCS-yes += vp10_common.mk
VP10_COMMON_SRCS-yes += vp10_iface_common.h
-VP10_COMMON_SRCS-yes += common/ppflags.h
VP10_COMMON_SRCS-yes += common/alloccommon.c
VP10_COMMON_SRCS-yes += common/blockd.c
VP10_COMMON_SRCS-yes += common/debugmodes.c
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c
-VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
-VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
-VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.h
-VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.c
-ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
-VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
-endif
-
ifneq ($(CONFIG_VPX_HIGHBITDEPTH),yes)
VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans4_dspr2.c
VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans8_dspr2.c
VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct8x8_msa.c
VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct16x16_msa.c
-ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c
-endif
-
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_sse2.c
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_dct32x32_impl_sse2.h
static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
va_list args) {
-#if CONFIG_VP9_POSTPROC
- vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *);
- if (config != NULL) {
- ctx->preview_ppcfg = *config;
- return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
- }
-#else
(void)ctx;
(void)args;
return VPX_CODEC_INCAPABLE;
-#endif
}
static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
YV12_BUFFER_CONFIG sd;
- vp10_ppflags_t flags;
- vp10_zero(flags);
-
- if (ctx->preview_ppcfg.post_proc_flag) {
- flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag;
- flags.deblocking_level = ctx->preview_ppcfg.deblocking_level;
- flags.noise_level = ctx->preview_ppcfg.noise_level;
- }
- if (vp10_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) {
+ if (vp10_get_preview_raw_frame(ctx->cpi, &sd) == 0) {
yuvconfig2image(&ctx->preview_img, &sd, NULL);
return &ctx->preview_img;
} else {
#include "vp10/vp10_iface_common.h"
-#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
+#define VP9_CAP_POSTPROC 0
typedef vpx_codec_stream_info_t vp10_stream_info_t;
(FrameWorkerData *)worker->data1;
vpx_get_worker_interface()->end(worker);
vp10_remove_common(&frame_worker_data->pbi->common);
-#if CONFIG_VP9_POSTPROC
- vp10_free_postproc_buffers(&frame_worker_data->pbi->common);
-#endif
vp10_decoder_remove(frame_worker_data->pbi);
vpx_free(frame_worker_data->scratch_buffer);
#if CONFIG_MULTITHREAD
cfg->noise_level = 0;
}
-static void set_ppflags(const vpx_codec_alg_priv_t *ctx,
- vp10_ppflags_t *flags) {
- flags->post_proc_flag =
- ctx->postproc_cfg.post_proc_flag;
-
- flags->deblocking_level = ctx->postproc_cfg.deblocking_level;
- flags->noise_level = ctx->postproc_cfg.noise_level;
-}
-
static int frame_worker_hook(void *arg1, void *arg2) {
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1;
const uint8_t *data = frame_worker_data->data;
static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) {
YV12_BUFFER_CONFIG sd;
- vp10_ppflags_t flags = {0, 0, 0};
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
check_resync(ctx, frame_worker_data->pbi);
- if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
+ if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) {
VP10_COMMON *const cm = &frame_worker_data->pbi->common;
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx;
if (*iter == NULL && ctx->frame_workers != NULL) {
do {
YV12_BUFFER_CONFIG sd;
- vp10_ppflags_t flags = {0, 0, 0};
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
VPxWorker *const worker =
&ctx->frame_workers[ctx->next_output_worker_id];
(FrameWorkerData *)worker->data1;
ctx->next_output_worker_id =
(ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
- if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
- set_ppflags(ctx, &flags);
// Wait for the frame from worker thread.
if (winterface->sync(worker)) {
// Check if worker has received any frames.
frame_worker_data->received_frame = 0;
check_resync(ctx, frame_worker_data->pbi);
}
- if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
+ if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) {
VP10_COMMON *const cm = &frame_worker_data->pbi->common;
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
release_last_output_frame(ctx);
static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
va_list args) {
-#if CONFIG_VP9_POSTPROC
- vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
-
- if (data) {
- ctx->postproc_cfg_set = 1;
- ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data);
- return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
- }
-#else
(void)ctx;
(void)args;
return VPX_CODEC_INCAPABLE;
-#endif
}
static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx,
VP10_CX_SRCS-yes += encoder/aq_complexity.h
VP10_CX_SRCS-yes += encoder/skin_detection.c
VP10_CX_SRCS-yes += encoder/skin_detection.h
-ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h
-VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c
-endif
VP10_CX_SRCS-yes += encoder/temporal_filter.c
VP10_CX_SRCS-yes += encoder/temporal_filter.h
VP10_CX_SRCS-yes += encoder/mbgraph.c
; On Android platforms use lrand48 when building postproc routines. Prior to L
; rand() was not available.
-%if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1
+%if CONFIG_POSTPROC=1
%ifdef __ANDROID__
extern sym(lrand48)
%define LIBVPX_RAND lrand48