From: John Koleszar Date: Tue, 8 Jan 2013 18:11:26 +0000 (-0800) Subject: Merge vp9-preview changes into experimental branch X-Git-Tag: v1.3.0~1210^2~27 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=879cb7d96259a71eea0038452a00241650589084;p=libvpx Merge vp9-preview changes into experimental branch Incorportate vp9-preview changes by merging master branch into experimental. Conflicts: test/test.mk vp9/common/vp9_filter.c vp9/common/vp9_idctllm.c vp9/common/vp9_invtrans.h vp9/common/vp9_mbpitch.c vp9/common/vp9_rtcd_defs.sh vp9/common/vp9_systemdependent.h vp9/common/vp9_type_aliases.h vp9/common/x86/vp9_asm_stubs.c vp9/common/x86/vp9_subpixel_mmx.asm vp9/decoder/vp9_decodframe.c vp9/decoder/vp9_dequantize.c vp9/decoder/vp9_dequantize.h vp9/decoder/vp9_onyxd_int.h vp9/encoder/vp9_bitstream.c vp9/encoder/vp9_encodeframe.c vp9/encoder/vp9_rdopt.c Change-Id: I17f51c3666d1b59cf1a699f87607cbc5d30a87c5 --- 879cb7d96259a71eea0038452a00241650589084 diff --cc test/test.mk index 919cf0438,cb15fcef8..28d387264 --- a/test/test.mk +++ b/test/test.mk @@@ -64,11 -68,7 +68,10 @@@ endi LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc + LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc +ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_TX32X32),yesyes) +LIBVPX_TEST_SRCS-yes += dct32x32_test.cc +endif - LIBVPX_TEST_SRCS-yes += idct8x8_test.cc - LIBVPX_TEST_SRCS-yes += variance_test.cc endif # VP9 diff --cc vp9/common/vp9_idctllm.c index 6cbc25967,893f378b5..4dd540e2a --- a/vp9/common/vp9_idctllm.c +++ b/vp9/common/vp9_idctllm.c @@@ -24,10 -24,10 +24,10 @@@ **************************************************************************/ #include #include - #include "vpx_ports/config.h" + #include "./vpx_config.h" #include "vp9/common/vp9_systemdependent.h" - #include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_common.h" static const int cospi8sqrt2minus1 = 20091; static const int sinpi8sqrt2 = 35468; @@@ -279,133 -157,16 +157,16 @@@ static const int16_t iadst_i16[256] = 3936, -3526, 3084, -2614, 2120, -1607, 1080, -542 }; - void vp9_ihtllm_float_c(const int16_t *input, int16_t *output, int pitch, - TX_TYPE tx_type, int tx_dim) { - vp9_clear_system_state(); // Make it simd safe : __asm emms; - { - int i, j, k; - float bufa[256], bufb[256]; // buffers are for floating-point test purpose - // the implementation could be simplified in - // conjunction with integer transform - const int16_t *ip = input; - int16_t *op = output; - int shortpitch = pitch >> 1; - - float *pfa = &bufa[0]; - float *pfb = &bufb[0]; - - // pointers to vertical and horizontal transforms - const float *ptv, *pth; - - assert(tx_type != DCT_DCT); - // load and convert residual array into floating-point - for(j = 0; j < tx_dim; j++) { - for(i = 0; i < tx_dim; i++) { - pfa[i] = (float)ip[i]; - } - pfa += tx_dim; - ip += tx_dim; - } - - // vertical transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case ADST_DCT : - ptv = (tx_dim == 4) ? &iadst_4[0] : - ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]); - break; - - default : - ptv = (tx_dim == 4) ? &idct_4[0] : - ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]); - break; - } - - for(j = 0; j < tx_dim; j++) { - for(i = 0; i < tx_dim; i++) { - pfb[i] = 0 ; - for(k = 0; k < tx_dim; k++) { - pfb[i] += ptv[k] * pfa[(k * tx_dim)]; - } - pfa += 1; - } - - pfb += tx_dim; - ptv += tx_dim; - pfa = &bufa[0]; - } - - // horizontal transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = (tx_dim == 4) ? &iadst_4[0] : - ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]); - break; - - default : - pth = (tx_dim == 4) ? &idct_4[0] : - ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]); - break; - } - - for(j = 0; j < tx_dim; j++) { - for(i = 0; i < tx_dim; i++) { - pfa[i] = 0; - for(k = 0; k < tx_dim; k++) { - pfa[i] += pfb[k] * pth[k]; - } - pth += tx_dim; - } - - pfa += tx_dim; - pfb += tx_dim; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = (tx_dim == 4) ? &iadst_4[0] : - ((tx_dim == 8) ? &iadst_8[0] : &iadst_16[0]); - break; - - default : - pth = (tx_dim == 4) ? &idct_4[0] : - ((tx_dim == 8) ? &idct_8[0] : &idct_16[0]); - break; - } - } - - // convert to short integer format and load BLOCKD buffer - op = output; - pfa = &bufa[0]; - - for(j = 0; j < tx_dim; j++) { - for(i = 0; i < tx_dim; i++) { - op[i] = (pfa[i] > 0 ) ? (int16_t)( pfa[i] / 8 + 0.49) : - -(int16_t)( - pfa[i] / 8 + 0.49); - } - - op += shortpitch; - pfa += tx_dim; - } - } - vp9_clear_system_state(); // Make it simd safe : __asm emms; - } /* Converted the transforms to integer form. */ -#define VERTICAL_SHIFT 14 // 16 -#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1) -#define HORIZONTAL_SHIFT 17 // 15 +#define HORIZONTAL_SHIFT 14 // 16 #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1) +#define VERTICAL_SHIFT 17 // 15 +#define VERTICAL_ROUNDING ((1 << (VERTICAL_SHIFT - 1)) - 1) void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch, - TX_TYPE tx_type, int tx_dim) { + TX_TYPE tx_type, int tx_dim, uint16_t eobs) { int i, j, k; + int nz_dim; int16_t imbuf[256]; const int16_t *ip = input; @@@ -444,47 -205,54 +205,60 @@@ break; } + nz_dim = tx_dim; + if(tx_dim > 4) { + if(eobs < 36) { + vpx_memset(im, 0, 512); + nz_dim = 8; + if(eobs < 3) { + nz_dim = 2; + } else if(eobs < 10) { + nz_dim = 4; + } + } + } + - /* vertical transformation */ + /* 2-D inverse transform X = M1*Z*Transposed_M2 is calculated in 2 steps + * from right to left: + * 1. horizontal transform: Y= Z*Transposed_M2 + * 2. vertical transform: X = M1*Y + * In SIMD, doing this way could eliminate the transpose needed if it is + * calculated from left to right. + */ + /* Horizontal transformation */ for (j = 0; j < tx_dim; j++) { - for (i = 0; i < tx_dim; i++) { + for (i = 0; i < nz_dim; i++) { int temp = 0; - for (k = 0; k < tx_dim; k++) { + for (k = 0; k < nz_dim; k++) { - temp += ptv[k] * ip[(k * tx_dim)]; + temp += ip[k] * pth[k]; } - im[i] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT); - ip++; + /* Calculate im and store it in its transposed position. */ + im[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT); + ip += tx_dim; } - im += tx_dim; // 16 - ptv += tx_dim; + im += tx_dim; + pth += tx_dim; ip = input; } - /* horizontal transformation */ + /* Vertical transformation */ im = &imbuf[0]; - for (j = 0; j < tx_dim; j++) { - const int16_t *pthc = pth; - - for (i = 0; i < tx_dim; i++) { + for (i = 0; i < tx_dim; i++) { + for (j = 0; j < tx_dim; j++) { int temp = 0; - for (k = 0; k < tx_dim; k++) { + for (k = 0; k < nz_dim; k++) { - temp += im[k] * pthc[k]; + temp += ptv[k] * im[k]; } - op[i] = (int16_t)((temp + HORIZONTAL_ROUNDING) >> HORIZONTAL_SHIFT); - pthc += tx_dim; + op[j] = (int16_t)((temp + VERTICAL_ROUNDING) >> VERTICAL_SHIFT); + im += tx_dim; } - - im += tx_dim; // 16 + im = &imbuf[0]; + ptv += tx_dim; op += shortpitch; } } diff --cc vp9/common/vp9_invtrans.h index 586a3dc4b,4474ba477..3cfb45fed --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@@ -11,8 -11,7 +11,8 @@@ #ifndef VP9_COMMON_VP9_INVTRANS_H_ #define VP9_COMMON_VP9_INVTRANS_H_ - #include "vpx_ports/config.h" + #include "./vpx_config.h" +#include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" extern void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch); diff --cc vp9/common/vp9_mbpitch.c index 31162655d,1107402ea..e94144813 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@@ -38,8 -43,9 +38,8 @@@ static void setup_block(BLOCKD *b static void setup_macroblock(MACROBLOCKD *xd, BLOCKSET bs) { int block; - unsigned char **y, **u, **v; - unsigned char **y2 = NULL, **u2 = NULL, **v2 = NULL; + uint8_t **y, **u, **v; - uint8_t **y2, **u2, **v2; ++ uint8_t **y2 = NULL, **u2 = NULL, **v2 = NULL; BLOCKD *blockd = xd->block; int stride; diff --cc vp9/common/vp9_rtcd_defs.sh index f02ee0260,6af7b3bad..95253ef67 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@@ -47,35 -45,35 +47,35 @@@ specialize vp9_dequantize_ prototype void vp9_dequantize_b_2x2 "struct blockd *x" specialize vp9_dequantize_b_2x2 -prototype void vp9_dequant_dc_idct_add_y_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, const short *dc, struct macroblockd *xd" +prototype void vp9_dequant_dc_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dc, struct macroblockd *xd" specialize vp9_dequant_dc_idct_add_y_block_8x8 -prototype void vp9_dequant_idct_add_y_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block_8x8 -prototype void vp9_dequant_idct_add_uv_block_8x8 "short *q, const short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs, struct macroblockd *xd" specialize vp9_dequant_idct_add_uv_block_8x8 -prototype void vp9_dequant_idct_add_16x16 "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, unsigned short eobs" +prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_16x16 -prototype void vp9_dequant_idct_add_8x8 "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int dc, unsigned short eobs" +prototype void vp9_dequant_idct_add_8x8 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int dc, int eob" specialize vp9_dequant_idct_add_8x8 -prototype void vp9_dequant_idct_add "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride" +prototype void vp9_dequant_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride" specialize vp9_dequant_idct_add -prototype void vp9_dequant_dc_idct_add "short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc" +prototype void vp9_dequant_dc_idct_add "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int dc" specialize vp9_dequant_dc_idct_add -prototype void vp9_dequant_dc_idct_add_y_block "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, const short *dc" +prototype void vp9_dequant_dc_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, const int16_t *dcs" - specialize vp9_dequant_dc_idct_add_y_block mmx + specialize vp9_dequant_dc_idct_add_y_block -prototype void vp9_dequant_idct_add_y_block "short *q, const short *dq, unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs" +prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs" - specialize vp9_dequant_idct_add_y_block mmx + specialize vp9_dequant_idct_add_y_block -prototype void vp9_dequant_idct_add_uv_block "short *q, const short *dq, unsigned char *pre, unsigned char *dstu, unsigned char *dstv, int stride, unsigned short *eobs" +prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, uint16_t *eobs" - specialize vp9_dequant_idct_add_uv_block mmx + specialize vp9_dequant_idct_add_uv_block # # RECON @@@ -218,7 -216,8 +218,8 @@@ vp9_loop_filter_simple_bh_sse2=vp9_loop # # post proc # + if [ "$CONFIG_POSTPROC" = "yes" ]; then -prototype void vp9_mbpost_proc_down "unsigned char *dst, int pitch, int rows, int cols, int flimit" +prototype void vp9_mbpost_proc_down "uint8_t *dst, int pitch, int rows, int cols, int flimit" specialize vp9_mbpost_proc_down mmx sse2 vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm @@@ -230,17 -229,18 +231,18 @@@ prototype void vp9_post_proc_down_and_a specialize vp9_post_proc_down_and_across mmx sse2 vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm -prototype void vp9_plane_add_noise "unsigned char *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch" +prototype void vp9_plane_add_noise "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch" specialize vp9_plane_add_noise mmx sse2 vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt + fi -prototype void vp9_blend_mb_inner "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" +prototype void vp9_blend_mb_inner "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" specialize vp9_blend_mb_inner -prototype void vp9_blend_mb_outer "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" +prototype void vp9_blend_mb_outer "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" specialize vp9_blend_mb_outer -prototype void vp9_blend_b "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride" +prototype void vp9_blend_b "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride" specialize vp9_blend_b # @@@ -342,31 -342,28 +344,31 @@@ specialize vp9_bilinear_predict_avg4x # # dct # -prototype void vp9_short_idct4x4llm_1 "short *input, short *output, int pitch" +prototype void vp9_short_idct4x4llm_1 "int16_t *input, int16_t *output, int pitch" - specialize vp9_short_idct4x4llm_1 mmx + specialize vp9_short_idct4x4llm_1 -prototype void vp9_short_idct4x4llm "short *input, short *output, int pitch" +prototype void vp9_short_idct4x4llm "int16_t *input, int16_t *output, int pitch" - specialize vp9_short_idct4x4llm mmx + specialize vp9_short_idct4x4llm -prototype void vp9_short_idct8x8 "short *input, short *output, int pitch" +prototype void vp9_short_idct8x8 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_idct8x8 -prototype void vp9_short_idct10_8x8 "short *input, short *output, int pitch" +prototype void vp9_short_idct10_8x8 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_idct10_8x8 -prototype void vp9_short_ihaar2x2 "short *input, short *output, int pitch" +prototype void vp9_short_ihaar2x2 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_ihaar2x2 -prototype void vp9_short_idct16x16 "short *input, short *output, int pitch" +prototype void vp9_short_idct16x16 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_idct16x16 -prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch" +prototype void vp9_short_idct10_16x16 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_idct10_16x16 -prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim, short eobs" +prototype void vp9_short_idct32x32 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_idct32x32 + - prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim" ++prototype void vp9_ihtllm "const int16_t *input, int16_t *output, int pitch, int tx_type, int tx_dim, int16_t eobs" specialize vp9_ihtllm # diff --cc vp9/common/vp9_systemdependent.h index d57a42df3,6f08e6906..1b9147ef4 --- a/vp9/common/vp9_systemdependent.h +++ b/vp9/common/vp9_systemdependent.h @@@ -11,11 -10,7 +11,11 @@@ #ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ #define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_ +#ifdef _MSC_VER +#include +#endif + - #include "vpx_ports/config.h" + #include "./vpx_config.h" #if ARCH_X86 || ARCH_X86_64 void vpx_reset_mmx_state(void); #define vp9_clear_system_state() vpx_reset_mmx_state() diff --cc vp9/common/x86/vp9_asm_stubs.c index 0d268a264,de1f0fa32..f09e2d78b --- a/vp9/common/x86/vp9_asm_stubs.c +++ b/vp9/common/x86/vp9_asm_stubs.c @@@ -13,8 -13,10 +13,8 @@@ #include "vpx_ports/mem.h" #include "vp9/common/vp9_subpixel.h" - extern const short vp9_six_tap_mmx[16][6 * 8]; + extern const short vp9_six_tap_mmx[8][6 * 8]; -extern const short vp9_bilinear_filters_8x_mmx[8][2 * 8]; - extern void vp9_filter_block1d_h6_mmx(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, diff --cc vp9/decoder/vp9_dboolhuff.h index c8c5c3b01,635bd5b7d..5afdd67c8 --- a/vp9/decoder/vp9_dboolhuff.h +++ b/vp9/decoder/vp9_dboolhuff.h @@@ -8,12 -8,12 +8,12 @@@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_DECODER_VP9_DBOOLHUFF_H_ #define VP9_DECODER_VP9_DBOOLHUFF_H_ + #include #include - #include "vpx_ports/config.h" + #include "./vpx_config.h" #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" diff --cc vp9/decoder/vp9_dequantize.c index 4376dc3d3,39a2de14b..72cd2771e --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@@ -13,8 -13,6 +13,7 @@@ #include "vp9/decoder/vp9_dequantize.h" #include "vpx_mem/vpx_mem.h" #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/common/vp9_common.h" - static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch, uint8_t *dest, int stride, int width, int height) { int r, c; diff --cc vp9/decoder/vp9_dequantize.h index c578608ba,f348b21b0..bbbc173a2 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@@ -14,105 -14,90 +14,90 @@@ #include "vp9/common/vp9_blockd.h" #if CONFIG_LOSSLESS - extern void vp9_dequant_idct_add_lossless_c(int16_t *input, - const int16_t *dq, - uint8_t *pred, - uint8_t *output, -extern void vp9_dequant_idct_add_lossless_c(short *input, const short *dq, ++extern void vp9_dequant_idct_add_lossless_c(int16_t *input, const int16_t *dq, + unsigned char *pred, + unsigned char *output, int pitch, int stride); - extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, - const int16_t *dq, - uint8_t *pred, - uint8_t *output, -extern void vp9_dequant_dc_idct_add_lossless_c(short *input, const short *dq, ++extern void vp9_dequant_dc_idct_add_lossless_c(int16_t *input, const int16_t *dq, + unsigned char *pred, + unsigned char *output, int pitch, int stride, int dc); -extern void vp9_dequant_dc_idct_add_y_block_lossless_c(short *q, - const short *dq, +extern void vp9_dequant_dc_idct_add_y_block_lossless_c(int16_t *q, + const int16_t *dq, - uint8_t *pre, - uint8_t *dst, + unsigned char *pre, + unsigned char *dst, int stride, - unsigned short *eobs, - const short *dc); -extern void vp9_dequant_idct_add_y_block_lossless_c(short *q, const short *dq, + uint16_t *eobs, + const int16_t *dc); - extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, - const int16_t *dq, - uint8_t *pre, - uint8_t *dst, ++extern void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, + unsigned char *pre, + unsigned char *dst, int stride, - unsigned short *eobs); -extern void vp9_dequant_idct_add_uv_block_lossless_c(short *q, const short *dq, + uint16_t *eobs); - extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, - const int16_t *dq, - uint8_t *pre, - uint8_t *dst_u, - uint8_t *dst_v, ++extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, + unsigned char *pre, + unsigned char *dst_u, + unsigned char *dst_v, int stride, - unsigned short *eobs); + uint16_t *eobs); - #endif // CONFIG_LOSSLESS + #endif -typedef void (*vp9_dequant_idct_add_fn_t)(short *input, const short *dq, +typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *output, - int pitch, int stride); + unsigned char *pred, unsigned char *output, int pitch, int stride); -typedef void(*vp9_dequant_dc_idct_add_fn_t)(short *input, const short *dq, +typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq, - uint8_t *pred, uint8_t *output, - int pitch, int stride, int dc); + unsigned char *pred, unsigned char *output, int pitch, int stride, int dc); - typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, - const int16_t *dq, - uint8_t *pre, uint8_t *dst, - int stride, uint16_t *eobs, - const int16_t *dc); -typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs, - const short *dc); -typedef void(*vp9_dequant_idct_add_y_block_fn_t)(short *q, const short *dq, - unsigned char *pre, unsigned char *dst, int stride, unsigned short *eobs); -typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(short *q, const short *dq, ++typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, ++ unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs, ++ const int16_t *dc); +typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst, - int stride, uint16_t *eobs); ++ unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs); +typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst_u, - uint8_t *dst_v, int stride, - uint16_t *eobs); + unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride, - unsigned short *eobs); ++ uint16_t *eobs); - void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, - uint8_t *pred, uint8_t *dest, - int pitch, int stride); -void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, const short *dq, ++void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, + unsigned char *pred, unsigned char *dest, + int pitch, int stride, uint16_t eobs); -void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, - const short *dq, unsigned char *pred, +void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, uint8_t *pred, - uint8_t *dest, int pitch, int stride); ++ const int16_t *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride, + uint16_t eobs); -void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input, - const short *dq, unsigned char *pred, +void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input, - const int16_t *dq, uint8_t *pred, - uint8_t *dest, - int pitch, int stride); ++ const int16_t *dq, unsigned char *pred, + unsigned char *dest, + int pitch, int stride, uint16_t eobs); #if CONFIG_SUPERBLOCKS - void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dst, -void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq, ++void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dst, int stride, - unsigned short *eobs, - const short *dc, + uint16_t *eobs, + const int16_t *dc, MACROBLOCKD *xd); - void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dst, -void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(short *q, const short *dq, ++void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dst, int stride, - unsigned short *eobs, - const short *dc, + uint16_t *eobs, + const int16_t *dc, MACROBLOCKD *xd); - void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dstu, - uint8_t *dstv, -void vp9_dequant_idct_add_uv_block_8x8_inplace_c(short *q, const short *dq, ++void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dstu, + unsigned char *dstv, int stride, - unsigned short *eobs, + uint16_t *eobs, MACROBLOCKD *xd); - void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, - const int16_t *dq, - uint8_t *dstu, - uint8_t *dstv, -void vp9_dequant_idct_add_uv_block_4x4_inplace_c(short *q, const short *dq, ++void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dstu, + unsigned char *dstv, int stride, - unsigned short *eobs, + uint16_t *eobs, MACROBLOCKD *xd); - #endif // CONFIG_SUPERBLOCKS + #endif - #endif // VP9_DECODER_VP9_DEQUANTIZE_H_ + #endif diff --cc vp9/decoder/vp9_onyxd_int.h index 6b7184fbe,49e13f7f4..64975468d --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@@ -8,11 -8,11 +8,10 @@@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_DECODER_VP9_ONYXD_INT_H_ #define VP9_DECODER_VP9_ONYXD_INT_H_ - - #include "vpx_ports/config.h" + #include "./vpx_config.h" -#include "vp9/common/vp9_onyxd.h" +#include "vp9/decoder/vp9_onyxd.h" #include "vp9/decoder/vp9_treereader.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/decoder/vp9_dequantize.h" diff --cc vp9/encoder/vp9_encodeframe.c index 702c35831,bd1966272..509c426d8 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@@ -2000,135 -2022,20 +2000,133 @@@ static void update_sb_skip_coeff_state( } } } + +#if CONFIG_SUPERBLOCKS64 +static void update_sb64_skip_coeff_state(VP9_COMP *cpi, + ENTROPY_CONTEXT_PLANES ta[16], + ENTROPY_CONTEXT_PLANES tl[16], + TOKENEXTRA *t[16], + TOKENEXTRA **tp, + int skip[16], int output_enabled) { + MACROBLOCK *const x = &cpi->mb; + + if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) { + TOKENEXTRA tokens[4][1024+512]; + int n_tokens[4], n; + + // if there were no skips, we don't need to do anything + if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) + return; + + // if we don't do coeff skipping for this frame, we don't + // need to do anything here + if (!cpi->common.mb_no_coeff_skip) + return; + + // if all 4 MBs skipped coeff coding, nothing to be done + if (skip[0] && skip[1] && skip[2] && skip[3]) + return; + + // so the situation now is that we want to skip coeffs + // for some MBs, but not all, and we didn't code EOB + // coefficients for them. However, the skip flag for this + // SB will be 0 overall, so we need to insert EOBs in the + // middle of the token tree. Do so here. + for (n = 0; n < 4; n++) { + if (n < 3) { + n_tokens[n] = t[n + 1] - t[n]; + } else { + n_tokens[n] = *tp - t[3]; + } + if (n_tokens[n]) { + memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); + } + } + + // reset pointer, stuff EOBs where necessary + *tp = t[0]; + for (n = 0; n < 4; n++) { + if (skip[n]) { + x->e_mbd.above_context = &ta[n * 2]; + x->e_mbd.left_context = &tl[n * 2]; + vp9_stuff_sb(cpi, &x->e_mbd, tp, !output_enabled); + } else { + if (n_tokens[n]) { + memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); + } + (*tp) += n_tokens[n]; + } + } + } else { + TOKENEXTRA tokens[16][16 * 25]; + int n_tokens[16], n; + + // if there were no skips, we don't need to do anything + if (!skip[ 0] && !skip[ 1] && !skip[ 2] && !skip[ 3] && + !skip[ 4] && !skip[ 5] && !skip[ 6] && !skip[ 7] && + !skip[ 8] && !skip[ 9] && !skip[10] && !skip[11] && + !skip[12] && !skip[13] && !skip[14] && !skip[15]) + return; + + // if we don't do coeff skipping for this frame, we don't + // need to do anything here + if (!cpi->common.mb_no_coeff_skip) + return; + + // if all 4 MBs skipped coeff coding, nothing to be done + if (skip[ 0] && skip[ 1] && skip[ 2] && skip[ 3] && + skip[ 4] && skip[ 5] && skip[ 6] && skip[ 7] && + skip[ 8] && skip[ 9] && skip[10] && skip[11] && + skip[12] && skip[13] && skip[14] && skip[15]) + return; + + // so the situation now is that we want to skip coeffs + // for some MBs, but not all, and we didn't code EOB + // coefficients for them. However, the skip flag for this + // SB will be 0 overall, so we need to insert EOBs in the + // middle of the token tree. Do so here. + for (n = 0; n < 16; n++) { + if (n < 15) { + n_tokens[n] = t[n + 1] - t[n]; + } else { + n_tokens[n] = *tp - t[15]; + } + if (n_tokens[n]) { + memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); + } + } + + // reset pointer, stuff EOBs where necessary + *tp = t[0]; + for (n = 0; n < 16; n++) { + if (skip[n]) { + x->e_mbd.above_context = &ta[n]; + x->e_mbd.left_context = &tl[n]; + vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); + } else { + if (n_tokens[n]) { + memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); + } + (*tp) += n_tokens[n]; + } + } + } +} +#endif // CONFIG_SUPERBLOCKS64 #endif /* CONFIG_SUPERBLOCKS */ -static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled, - int mb_col, int mb_row) { - VP9_COMMON *cm = &cpi->common; +static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, + int mb_row, int mb_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; - unsigned char *segment_id = &mbmi->segment_id; - int seg_ref_active; unsigned char ref_pred_flag; - x->skip = 0; #if CONFIG_SUPERBLOCKS - assert(!xd->mode_info_context->mbmi.encoded_as_sb); + assert(!xd->mode_info_context->mbmi.sb_type); #endif #ifdef ENC_DEBUG diff --cc vp9/encoder/vp9_picklpf.c index 7091c4932,4eb51df41..b443ede6f --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@@ -21,10 -21,9 +21,9 @@@ void vp9_yv12_copy_partial_frame_c(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction) { - unsigned char *src_y, *dst_y; + uint8_t *src_y, *dst_y; int yheight; int ystride; - int border; int yoffset; int linestocopy; diff --cc vp9/encoder/vp9_rdopt.c index 267dd0aa5,27decb91e..8e91d828f --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@@ -4127,8 -3625,9 +4131,9 @@@ static void rd_pick_inter_mode(VP9_COM #if CONFIG_COMP_INTRA_PRED 0, #endif - 0); + cpi->update_context); rate2 += rate; + rate2 += intra_cost_penalty; distortion2 += distortion; if (tmp_rd < best_yrd) { diff --cc vp9/encoder/vp9_sad_c.c index 9ce27fbed,465044278..84121f79c --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@@ -11,20 -11,12 +11,20 @@@ #include #include "vp9/common/vp9_sadmxn.h" - #include "vpx_ports/config.h" + #include "./vpx_config.h" #include "vpx/vpx_integer.h" -unsigned int vp9_sad32x32_c(const unsigned char *src_ptr, +unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, int src_stride, - const unsigned char *ref_ptr, + const uint8_t *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64); +} + +unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, int ref_stride, int max_sad) { return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32); diff --cc vp9/encoder/vp9_temporal_filter.c index 159d6faa5,57253bd50..8bbe53486 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@@ -123,14 -129,16 +123,13 @@@ void vp9_temporal_filter_apply_c(uint8_ #if ALT_REF_MC_ENABLED -static int temporal_filter_find_matching_mb_c -( - VP9_COMP *cpi, - YV12_BUFFER_CONFIG *arf_frame, - YV12_BUFFER_CONFIG *frame_ptr, - int mb_offset, - int error_thresh -) { +static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, + YV12_BUFFER_CONFIG *arf_frame, + YV12_BUFFER_CONFIG *frame_ptr, + int mb_offset, + int error_thresh) { MACROBLOCK *x = &cpi->mb; int step_param; - int further_steps; int sadpb = x->sadperbit16; int bestsme = INT_MAX;