Incorportate vp9-preview changes by merging master branch into experimental.
Conflicts:
test/test.mk
vp9/common/vp9_filter.c
vp9/common/vp9_idctllm.c
vp9/common/vp9_invtrans.h
vp9/common/vp9_mbpitch.c
vp9/common/vp9_rtcd_defs.sh
vp9/common/vp9_systemdependent.h
vp9/common/vp9_type_aliases.h
vp9/common/x86/vp9_asm_stubs.c
vp9/common/x86/vp9_subpixel_mmx.asm
vp9/decoder/vp9_decodframe.c
vp9/decoder/vp9_dequantize.c
vp9/decoder/vp9_dequantize.h
vp9/decoder/vp9_onyxd_int.h
vp9/encoder/vp9_bitstream.c
vp9/encoder/vp9_encodeframe.c
vp9/encoder/vp9_rdopt.c
Change-Id: I17f51c3666d1b59cf1a699f87607cbc5d30a87c5
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
- LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
- LIBVPX_TEST_SRCS-yes += variance_test.cc
+ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
+ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_TX32X32),yesyes)
+LIBVPX_TEST_SRCS-yes += dct32x32_test.cc
+endif
endif # VP9
**************************************************************************/
#include <assert.h>
#include <math.h>
- #include "vpx_ports/config.h"
+ #include "./vpx_config.h"
#include "vp9/common/vp9_systemdependent.h"
-
#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_common.h"
static const int cospi8sqrt2minus1 = 20091;
static const int sinpi8sqrt2 = 35468;
3936, -3526, 3084, -2614, 2120, -1607, 1080, -542
};
- void vp9_ihtllm_float_c(const int16_t *input, int16_t *output, int pitch,
- TX_TYPE tx_type, int tx_dim) {
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- {
- int i, j, k;
- float bufa[256], bufb[256]; // buffers are for floating-point test purpose
- // the implementation could be simplified in
- // conjunction with integer transform
- const int16_t *ip = input;
- int16_t *op = output;
- int shortpitch = pitch >> 1;
-
- float *pfa = &bufa[0];
- float *pfb = &bufb[0];
-
- // pointers to vertical and horizontal transforms
- const float *ptv, *pth;
-
- assert(tx_type != DCT_DCT);
- // load and convert residual array into floating-point
- for(j = 0; j < tx_dim; j++) {
- for(i = 0; i < tx_dim; i++) {
- pfa[i] = (float)ip[i];
- }
- pfa += tx_dim;
- ip += tx_dim;
- }
-
- // vertical transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
-
- switch(tx_type) {
- case ADST_ADST :
- case ADST_DCT :
- ptv = (tx_dim == 4) ? &iadst_4[0] :
- ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
- break;
-
- default :
- ptv = (tx_dim == 4) ? &idct_4[0] :
- ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
- break;
- }
-
- for(j = 0; j < tx_dim; j++) {
- for(i = 0; i < tx_dim; i++) {
- pfb[i] = 0 ;
- for(k = 0; k < tx_dim; k++) {
- pfb[i] += ptv[k] * pfa[(k * tx_dim)];
- }
- pfa += 1;
- }
-
- pfb += tx_dim;
- ptv += tx_dim;
- pfa = &bufa[0];
- }
-
- // horizontal transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
-
- switch(tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = (tx_dim == 4) ? &iadst_4[0] :
- ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
- break;
-
- default :
- pth = (tx_dim == 4) ? &idct_4[0] :
- ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
- break;
- }
-
- for(j = 0; j < tx_dim; j++) {
- for(i = 0; i < tx_dim; i++) {
- pfa[i] = 0;
- for(k = 0; k < tx_dim; k++) {
- pfa[i] += pfb[k] * pth[k];
- }
- pth += tx_dim;
- }
-
- pfa += tx_dim;
- pfb += tx_dim;
-
- switch(tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = (tx_dim == 4) ? &iadst_4[0] :
- ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]);
- break;
-
- default :
- pth = (tx_dim == 4) ? &idct_4[0] :
- ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]);
- break;
- }
- }
-
- // convert to short integer format and load BLOCKD buffer
- op = output;
- pfa = &bufa[0];
-
- for(j = 0; j < tx_dim; j++) {
- for(i = 0; i < tx_dim; i++) {
- op[i] = (pfa[i] > 0 ) ? (int16_t)( pfa[i] / 8 + 0.49) :
- -(int16_t)( - pfa[i] / 8 + 0.49);
- }
-
- op += shortpitch;
- pfa += tx_dim;
- }
- }
- vp9_clear_system_state(); // Make it simd safe : __asm emms;
- }
/* Converted the transforms to integer form. */
-#define VERTICAL_SHIFT 14 // 16
-#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
-#define HORIZONTAL_SHIFT 17 // 15
+#define HORIZONTAL_SHIFT 14 // 16
#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
+#define VERTICAL_SHIFT 17 // 15
+#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1)
void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
- TX_TYPE tx_type, int tx_dim) {
+ TX_TYPE tx_type, int tx_dim, uint16_t eobs) {
int i, j, k;
+ int nz_dim;
int16_t imbuf[256];
const int16_t *ip = input;
break;
}
- /* vertical transformation */
+ nz_dim = tx_dim;
+ if(tx_dim > 4) {
+ if(eobs < 36) {
+ vpx_memset(im, 0, 512);
+ nz_dim = 8;
+ if(eobs < 3) {
+ nz_dim = 2;
+ } else if(eobs < 10) {
+ nz_dim = 4;
+ }
+ }
+ }
+
+ /* 2-D inverse transform X = M1*Z*Transposed_M2 is calculated in 2 steps
+ * from right to left:
+ * 1. horizontal transform: Y= Z*Transposed_M2
+ * 2. vertical transform: X = M1*Y
+ * In SIMD, doing this way could eliminate the transpose needed if it is
+ * calculated from left to right.
+ */
+ /* Horizontal transformation */
for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
+ for (i = 0; i < nz_dim; i++) {
int temp = 0;
- for (k = 0; k < tx_dim; k++) {
+ for (k = 0; k < nz_dim; k++) {
- temp += ptv[k] * ip[(k * tx_dim)];
+ temp += ip[k] * pth[k];
}
- im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
- ip++;
+ /* Calculate im and store it in its transposed position. */
+ im[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
+ ip += tx_dim;
}
- im += tx_dim; // 16
- ptv += tx_dim;
+ im += tx_dim;
+ pth += tx_dim;
ip = input;
}
- /* horizontal transformation */
+ /* Vertical transformation */
im = &imbuf[0];
- for (j = 0; j < tx_dim; j++) {
- const int16_t *pthc = pth;
-
- for (i = 0; i < tx_dim; i++) {
+ for (i = 0; i < tx_dim; i++) {
+ for (j = 0; j < tx_dim; j++) {
int temp = 0;
- for (k = 0; k < tx_dim; k++) {
+ for (k = 0; k < nz_dim; k++) {
- temp += im[k] * pthc[k];
+ temp += ptv[k] * im[k];
}
- op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT);
- pthc += tx_dim;
+ op[j] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT);
+ im += tx_dim;
}
-
- im += tx_dim; // 16
+ im = &imbuf[0];
+ ptv += tx_dim;
op += shortpitch;
}
}
#ifndef VP9_COMMON_VP9_INVTRANS_H_
#define VP9_COMMON_VP9_INVTRANS_H_
- #include "vpx_ports/config.h"
+ #include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
extern void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch);
static void setup_macroblock(MACROBLOCKD *xd, BLOCKSET bs) {
int block;
- unsigned char **y, **u, **v;
- unsigned char **y2 = NULL, **u2 = NULL, **v2 = NULL;
+ uint8_t **y, **u, **v;
- uint8_t **y2, **u2, **v2;
++ uint8_t **y2 = NULL, **u2 = NULL, **v2 = NULL;
BLOCKD *blockd = xd->block;
int stride;
prototype void vp9_dequantize_b_2x2 "struct blockd *x"
specialize vp9_dequantize_b_2x2
-prototype void vp9_dequant_dc_idct_add_y_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, const short *dc, struct macroblockd *xd"
+prototype void vp9_dequant_dc_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dc, struct macroblockd *xd"
specialize vp9_dequant_dc_idct_add_y_block_8x8
-prototype void vp9_dequant_idct_add_y_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, struct macroblockd *xd"
+prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, struct macroblockd *xd"
specialize vp9_dequant_idct_add_y_block_8x8
-prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs, struct macroblockd *xd"
+prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs, struct macroblockd *xd"
specialize vp9_dequant_idct_add_uv_block_8x8
-prototype void vp9_dequant_idct_add_16x16 "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, unsigned short eobs"
+prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
specialize vp9_dequant_idct_add_16x16
-prototype void vp9_dequant_idct_add_8x8 "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int dc, unsigned short eobs"
+prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int dc, int eob"
specialize vp9_dequant_idct_add_8x8
-prototype void vp9_dequant_idct_add "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride"
+prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride"
specialize vp9_dequant_idct_add
-prototype void vp9_dequant_dc_idct_add "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc"
+prototype void vp9_dequant_dc_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int dc"
specialize vp9_dequant_dc_idct_add
-prototype void vp9_dequant_dc_idct_add_y_block "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, const short *dc"
+prototype void vp9_dequant_dc_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dcs"
- specialize vp9_dequant_dc_idct_add_y_block mmx
+ specialize vp9_dequant_dc_idct_add_y_block
-prototype void vp9_dequant_idct_add_y_block "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs"
+prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs"
- specialize vp9_dequant_idct_add_y_block mmx
+ specialize vp9_dequant_idct_add_y_block
-prototype void vp9_dequant_idct_add_uv_block "short *q, const short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs"
+prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs"
- specialize vp9_dequant_idct_add_uv_block mmx
+ specialize vp9_dequant_idct_add_uv_block
#
# RECON
#
# post proc
#
-prototype void vp9_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols, int flimit"
+ if [ "$CONFIG_POSTPROC" = "yes" ]; then
+prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit"
specialize vp9_mbpost_proc_down mmx sse2
vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm
specialize vp9_post_proc_down_and_across mmx sse2
vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm
-prototype void vp9_plane_add_noise "unsigned char *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"
+prototype void vp9_plane_add_noise "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"
specialize vp9_plane_add_noise mmx sse2
vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt
+ fi
-prototype void vp9_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
+prototype void vp9_blend_mb_inner "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
specialize vp9_blend_mb_inner
-prototype void vp9_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
+prototype void vp9_blend_mb_outer "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
specialize vp9_blend_mb_outer
-prototype void vp9_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride"
+prototype void vp9_blend_b "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride"
specialize vp9_blend_b
#
#
# dct
#
-prototype void vp9_short_idct4x4llm_1 "short *input, short *output, int pitch"
+prototype void vp9_short_idct4x4llm_1 "int16_t *input, int16_t *output, int pitch"
- specialize vp9_short_idct4x4llm_1 mmx
+ specialize vp9_short_idct4x4llm_1
-prototype void vp9_short_idct4x4llm "short *input, short *output, int pitch"
+prototype void vp9_short_idct4x4llm "int16_t *input, int16_t *output, int pitch"
- specialize vp9_short_idct4x4llm mmx
+ specialize vp9_short_idct4x4llm
-prototype void vp9_short_idct8x8 "short *input, short *output, int pitch"
+prototype void vp9_short_idct8x8 "int16_t *input, int16_t *output, int pitch"
specialize vp9_short_idct8x8
-prototype void vp9_short_idct10_8x8 "short *input, short *output, int pitch"
+prototype void vp9_short_idct10_8x8 "int16_t *input, int16_t *output, int pitch"
specialize vp9_short_idct10_8x8
-prototype void vp9_short_ihaar2x2 "short *input, short *output, int pitch"
+prototype void vp9_short_ihaar2x2 "int16_t *input, int16_t *output, int pitch"
specialize vp9_short_ihaar2x2
-prototype void vp9_short_idct16x16 "short *input, short *output, int pitch"
+prototype void vp9_short_idct16x16 "int16_t *input, int16_t *output, int pitch"
specialize vp9_short_idct16x16
-prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch"
+prototype void vp9_short_idct10_16x16 "int16_t *input, int16_t *output, int pitch"
specialize vp9_short_idct10_16x16
-prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim, short eobs"
+prototype void vp9_short_idct32x32 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_idct32x32
+
- prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim"
++prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs"
specialize vp9_ihtllm
#
#ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
#define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
- #include "vpx_ports/config.h"
+#ifdef _MSC_VER
+#include <math.h>
+#endif
+
+ #include "./vpx_config.h"
#if ARCH_X86 || ARCH_X86_64
void vpx_reset_mmx_state(void);
#define vp9_clear_system_state() vpx_reset_mmx_state()
#include "vpx_ports/mem.h"
#include "vp9/common/vp9_subpixel.h"
- extern const short vp9_six_tap_mmx[16][6 * 8];
+ extern const short vp9_six_tap_mmx[8][6 * 8];
-extern const short vp9_bilinear_filters_8x_mmx[8][2 * 8];
-
extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
* be found in the AUTHORS file in the root of the source tree.
*/
-
#ifndef VP9_DECODER_VP9_DBOOLHUFF_H_
#define VP9_DECODER_VP9_DBOOLHUFF_H_
+
#include <stddef.h>
#include <limits.h>
- #include "vpx_ports/config.h"
+ #include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
#include "vp9/decoder/vp9_dequantize.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/decoder/vp9_onyxd_int.h"
-
+#include "vp9/common/vp9_common.h"
static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
uint8_t *dest, int stride, int width, int height) {
int r, c;
#include "vp9/common/vp9_blockd.h"
#if CONFIG_LOSSLESS
- extern void vp9_dequant_idct_add_lossless_c(int16_t *input,
- const int16_t *dq,
- uint8_t *pred,
- uint8_t *output,
-extern void vp9_dequant_idct_add_lossless_c(short *input, const short *dq,
++extern void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq,
+ unsigned char *pred,
+ unsigned char *output,
int pitch, int stride);
- extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input,
- const int16_t *dq,
- uint8_t *pred,
- uint8_t *output,
-extern void vp9_dequant_dc_idct_add_lossless_c(short *input, const short *dq,
++extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq,
+ unsigned char *pred,
+ unsigned char *output,
int pitch, int stride, int dc);
-extern void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q,
- const short *dq,
+extern void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q,
+ const int16_t *dq,
- uint8_t *pre,
- uint8_t *dst,
+ unsigned char *pre,
+ unsigned char *dst,
int stride,
- unsigned short *eobs,
- const short *dc);
-extern void vp9_dequant_idct_add_y_block_lossless_c(short *q, const short *dq,
+ uint16_t *eobs,
+ const int16_t *dc);
- extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q,
- const int16_t *dq,
- uint8_t *pre,
- uint8_t *dst,
++extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
+ unsigned char *pre,
+ unsigned char *dst,
int stride,
- unsigned short *eobs);
-extern void vp9_dequant_idct_add_uv_block_lossless_c(short *q, const short *dq,
+ uint16_t *eobs);
- extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q,
- const int16_t *dq,
- uint8_t *pre,
- uint8_t *dst_u,
- uint8_t *dst_v,
++extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
+ unsigned char *pre,
+ unsigned char *dst_u,
+ unsigned char *dst_v,
int stride,
- unsigned short *eobs);
+ uint16_t *eobs);
- #endif // CONFIG_LOSSLESS
+ #endif
-typedef void (*vp9_dequant_idct_add_fn_t)(short *input, const short *dq,
+typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq,
- uint8_t *pred, uint8_t *output,
- int pitch, int stride);
+ unsigned char *pred, unsigned char *output, int pitch, int stride);
-typedef void(*vp9_dequant_dc_idct_add_fn_t)(short *input, const short *dq,
+typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq,
- uint8_t *pred, uint8_t *output,
- int pitch, int stride, int dc);
+ unsigned char *pred, unsigned char *output, int pitch, int stride, int dc);
- typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q,
- const int16_t *dq,
- uint8_t *pre, uint8_t *dst,
- int stride, uint16_t *eobs,
- const int16_t *dc);
-typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(short *q, const short *dq,
- unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs,
- const short *dc);
-typedef void(*vp9_dequant_idct_add_y_block_fn_t)(short *q, const short *dq,
- unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs);
-typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(short *q, const short *dq,
++typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,
++ unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs,
++ const int16_t *dc);
+typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,
- uint8_t *pre, uint8_t *dst,
- int stride, uint16_t *eobs);
++ unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs);
+typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq,
- uint8_t *pre, uint8_t *dst_u,
- uint8_t *dst_v, int stride,
- uint16_t *eobs);
+ unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride,
- unsigned short *eobs);
++ uint16_t *eobs);
- void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
- const int16_t *dq,
- uint8_t *pred, uint8_t *dest,
- int pitch, int stride);
-void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, const short *dq,
++void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq,
+ unsigned char *pred, unsigned char *dest,
+ int pitch, int stride, uint16_t eobs);
-void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input,
- const short *dq, unsigned char *pred,
+void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
- const int16_t *dq, uint8_t *pred,
- uint8_t *dest, int pitch, int stride);
++ const int16_t *dq, unsigned char *pred,
+ unsigned char *dest, int pitch, int stride,
+ uint16_t eobs);
-void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input,
- const short *dq, unsigned char *pred,
+void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
- const int16_t *dq, uint8_t *pred,
- uint8_t *dest,
- int pitch, int stride);
++ const int16_t *dq, unsigned char *pred,
+ unsigned char *dest,
+ int pitch, int stride, uint16_t eobs);
#if CONFIG_SUPERBLOCKS
- void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q,
- const int16_t *dq,
- uint8_t *dst,
-void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq,
++void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
+ unsigned char *dst,
int stride,
- unsigned short *eobs,
- const short *dc,
+ uint16_t *eobs,
+ const int16_t *dc,
MACROBLOCKD *xd);
- void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,
- const int16_t *dq,
- uint8_t *dst,
-void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, const short *dq,
++void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
+ unsigned char *dst,
int stride,
- unsigned short *eobs,
- const short *dc,
+ uint16_t *eobs,
+ const int16_t *dc,
MACROBLOCKD *xd);
- void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q,
- const int16_t *dq,
- uint8_t *dstu,
- uint8_t *dstv,
-void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, const short *dq,
++void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq,
+ unsigned char *dstu,
+ unsigned char *dstv,
int stride,
- unsigned short *eobs,
+ uint16_t *eobs,
MACROBLOCKD *xd);
- void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q,
- const int16_t *dq,
- uint8_t *dstu,
- uint8_t *dstv,
-void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, const short *dq,
++void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
+ unsigned char *dstu,
+ unsigned char *dstv,
int stride,
- unsigned short *eobs,
+ uint16_t *eobs,
MACROBLOCKD *xd);
- #endif // CONFIG_SUPERBLOCKS
+ #endif
- #endif // VP9_DECODER_VP9_DEQUANTIZE_H_
+ #endif
* be found in the AUTHORS file in the root of the source tree.
*/
-
#ifndef VP9_DECODER_VP9_ONYXD_INT_H_
#define VP9_DECODER_VP9_ONYXD_INT_H_
-
- #include "vpx_ports/config.h"
+ #include "./vpx_config.h"
-#include "vp9/common/vp9_onyxd.h"
+#include "vp9/decoder/vp9_onyxd.h"
#include "vp9/decoder/vp9_treereader.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/decoder/vp9_dequantize.h"
}
}
}
+
+#if CONFIG_SUPERBLOCKS64
+static void update_sb64_skip_coeff_state(VP9_COMP *cpi,
+ ENTROPY_CONTEXT_PLANES ta[16],
+ ENTROPY_CONTEXT_PLANES tl[16],
+ TOKENEXTRA *t[16],
+ TOKENEXTRA **tp,
+ int skip[16], int output_enabled) {
+ MACROBLOCK *const x = &cpi->mb;
+
+ if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) {
+ TOKENEXTRA tokens[4][1024+512];
+ int n_tokens[4], n;
+
+ // if there were no skips, we don't need to do anything
+ if (!skip[0] && !skip[1] && !skip[2] && !skip[3])
+ return;
+
+ // if we don't do coeff skipping for this frame, we don't
+ // need to do anything here
+ if (!cpi->common.mb_no_coeff_skip)
+ return;
+
+ // if all 4 MBs skipped coeff coding, nothing to be done
+ if (skip[0] && skip[1] && skip[2] && skip[3])
+ return;
+
+ // so the situation now is that we want to skip coeffs
+ // for some MBs, but not all, and we didn't code EOB
+ // coefficients for them. However, the skip flag for this
+ // SB will be 0 overall, so we need to insert EOBs in the
+ // middle of the token tree. Do so here.
+ for (n = 0; n < 4; n++) {
+ if (n < 3) {
+ n_tokens[n] = t[n + 1] - t[n];
+ } else {
+ n_tokens[n] = *tp - t[3];
+ }
+ if (n_tokens[n]) {
+ memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0]));
+ }
+ }
+
+ // reset pointer, stuff EOBs where necessary
+ *tp = t[0];
+ for (n = 0; n < 4; n++) {
+ if (skip[n]) {
+ x->e_mbd.above_context = &ta[n * 2];
+ x->e_mbd.left_context = &tl[n * 2];
+ vp9_stuff_sb(cpi, &x->e_mbd, tp, !output_enabled);
+ } else {
+ if (n_tokens[n]) {
+ memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]);
+ }
+ (*tp) += n_tokens[n];
+ }
+ }
+ } else {
+ TOKENEXTRA tokens[16][16 * 25];
+ int n_tokens[16], n;
+
+ // if there were no skips, we don't need to do anything
+ if (!skip[ 0] && !skip[ 1] && !skip[ 2] && !skip[ 3] &&
+ !skip[ 4] && !skip[ 5] && !skip[ 6] && !skip[ 7] &&
+ !skip[ 8] && !skip[ 9] && !skip[10] && !skip[11] &&
+ !skip[12] && !skip[13] && !skip[14] && !skip[15])
+ return;
+
+ // if we don't do coeff skipping for this frame, we don't
+ // need to do anything here
+ if (!cpi->common.mb_no_coeff_skip)
+ return;
+
+ // if all 4 MBs skipped coeff coding, nothing to be done
+ if (skip[ 0] && skip[ 1] && skip[ 2] && skip[ 3] &&
+ skip[ 4] && skip[ 5] && skip[ 6] && skip[ 7] &&
+ skip[ 8] && skip[ 9] && skip[10] && skip[11] &&
+ skip[12] && skip[13] && skip[14] && skip[15])
+ return;
+
+ // so the situation now is that we want to skip coeffs
+ // for some MBs, but not all, and we didn't code EOB
+ // coefficients for them. However, the skip flag for this
+ // SB will be 0 overall, so we need to insert EOBs in the
+ // middle of the token tree. Do so here.
+ for (n = 0; n < 16; n++) {
+ if (n < 15) {
+ n_tokens[n] = t[n + 1] - t[n];
+ } else {
+ n_tokens[n] = *tp - t[15];
+ }
+ if (n_tokens[n]) {
+ memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0]));
+ }
+ }
+
+ // reset pointer, stuff EOBs where necessary
+ *tp = t[0];
+ for (n = 0; n < 16; n++) {
+ if (skip[n]) {
+ x->e_mbd.above_context = &ta[n];
+ x->e_mbd.left_context = &tl[n];
+ vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled);
+ } else {
+ if (n_tokens[n]) {
+ memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]);
+ }
+ (*tp) += n_tokens[n];
+ }
+ }
+ }
+}
+#endif // CONFIG_SUPERBLOCKS64
#endif /* CONFIG_SUPERBLOCKS */
-static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
- TOKENEXTRA **t, int recon_yoffset,
- int recon_uvoffset, int output_enabled,
- int mb_col, int mb_row) {
- VP9_COMMON *cm = &cpi->common;
+static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
+ int recon_yoffset, int recon_uvoffset,
+ int output_enabled,
+ int mb_row, int mb_col) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
- unsigned char *segment_id = &mbmi->segment_id;
- int seg_ref_active;
unsigned char ref_pred_flag;
- x->skip = 0;
#if CONFIG_SUPERBLOCKS
- assert(!xd->mode_info_context->mbmi.encoded_as_sb);
+ assert(!xd->mode_info_context->mbmi.sb_type);
#endif
#ifdef ENC_DEBUG
void vp9_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc,
YV12_BUFFER_CONFIG *dst_ybc, int Fraction) {
- unsigned char *src_y, *dst_y;
+ uint8_t *src_y, *dst_y;
int yheight;
int ystride;
- int border;
int yoffset;
int linestocopy;
#if CONFIG_COMP_INTRA_PRED
0,
#endif
- 0);
+ cpi->update_context);
rate2 += rate;
+ rate2 += intra_cost_penalty;
distortion2 += distortion;
if (tmp_rd < best_yrd) {
#include <stdlib.h>
#include "vp9/common/vp9_sadmxn.h"
- #include "vpx_ports/config.h"
+ #include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-unsigned int vp9_sad32x32_c(const unsigned char *src_ptr,
+unsigned int vp9_sad64x64_c(const uint8_t *src_ptr,
int src_stride,
- const unsigned char *ref_ptr,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64);
+}
+
+unsigned int vp9_sad32x32_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
int ref_stride,
int max_sad) {
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32);
#if ALT_REF_MC_ENABLED
-static int temporal_filter_find_matching_mb_c
-(
- VP9_COMP *cpi,
- YV12_BUFFER_CONFIG *arf_frame,
- YV12_BUFFER_CONFIG *frame_ptr,
- int mb_offset,
- int error_thresh
-) {
+static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
+ YV12_BUFFER_CONFIG *arf_frame,
+ YV12_BUFFER_CONFIG *frame_ptr,
+ int mb_offset,
+ int error_thresh) {
MACROBLOCK *x = &cpi->mb;
int step_param;
- int further_steps;
int sadpb = x->sadperbit16;
int bestsme = INT_MAX;