From 08f64718905efe994a207af95b1c276c104e4113 Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Mon, 14 Feb 2011 14:18:18 -0800 Subject: [PATCH] Add 8x8 transform to experimental branch Please refer to previous commit messages for detailed info: https://on2-git.corp.google.com/g/#change,5940 https://on2-git.corp.google.com/g/#change,6045 Change-Id: I8b16992f2f69c5a808ad40a3e32ef589cce7c59d --- configure | 1 + vp8/common/blockd.h | 16 +- vp8/common/coefupdateprobs.h | 173 +++++++ vp8/common/common.h | 3 +- vp8/common/defaultcoefcounts.c | 177 +++++++ vp8/common/defaultcoefcounts.h | 7 +- vp8/common/entropy.c | 56 ++- vp8/common/entropy.h | 16 +- vp8/common/generic/systemdependent.c | 8 +- vp8/common/idct.h | 37 +- vp8/common/idctllm.c | 318 +++++++++++- vp8/common/invtrans.c | 91 +++- vp8/common/invtrans.h | 7 + vp8/common/onyx.h | 1 + vp8/common/onyxc_int.h | 3 + vp8/common/recon.c | 5 +- vp8/common/reconinter.c | 4 - vp8/decoder/decodemv.c | 8 +- vp8/decoder/decodframe.c | 177 +++++-- vp8/decoder/dequantize.c | 228 ++++++++- vp8/decoder/dequantize.h | 66 +++ vp8/decoder/detokenize.c | 424 +++++++++++++++- vp8/decoder/detokenize.h | 3 + vp8/decoder/generic/dsystemdependent.c | 11 + vp8/decoder/idct_blk.c | 71 ++- vp8/decoder/onyxd_if.c | 60 ++- vp8/decoder/onyxd_int.h | 11 + vp8/encoder/bitstream.c | 213 +++++++- vp8/encoder/block.h | 8 +- vp8/encoder/dct.c | 128 ++++- vp8/encoder/dct.h | 18 + vp8/encoder/encodeframe.c | 295 ++++++++++- vp8/encoder/encodeintra.c | 122 ++++- vp8/encoder/encodemb.c | 671 ++++++++++++++++++++++++- vp8/encoder/encodemb.h | 12 + vp8/encoder/ethreading.c | 18 +- vp8/encoder/generic/csystemdependent.c | 10 + vp8/encoder/onyx_if.c | 108 +++- vp8/encoder/onyx_int.h | 14 +- vp8/encoder/quantize.c | 597 +++++++++++++++++++++- vp8/encoder/quantize.h | 27 + vp8/encoder/rdopt.c | 3 - vp8/encoder/rdopt.h | 1 + vp8/encoder/tokenize.c | 492 +++++++++++++++++- vp8/encoder/tokenize.h | 1 + 45 files changed, 4554 insertions(+), 166 deletions(-) diff --git a/configure b/configure index 772cded33..45f1f1abb 100755 --- a/configure +++ b/configure @@ -217,6 +217,7 @@ HAVE_LIST=" EXPERIMENT_LIST=" extend_qrange segmentation + t8x8 csm " CONFIG_LIST=" diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 48929a869..6482b2a89 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -20,6 +20,7 @@ void vpx_log(const char *format, ...); #include "treecoder.h" #include "subpixel.h" #include "vpx_ports/mem.h" +#include "common.h" #define TRUE 1 #define FALSE 0 @@ -65,6 +66,10 @@ extern const unsigned char vp8_block2above[25]; #define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \ Dest = ((A)!=0) + ((B)!=0); +#if CONFIG_T8X8 +#define VP8_COMBINEENTROPYCONTEXTS_8x8( Dest, A1, B1, A2, B2) \ + Dest = ((A1)!=0 || (A2)!=0) + ((B1)!=0 || (B2)!=0); +#endif typedef enum { @@ -89,15 +94,15 @@ typedef enum MB_MODE_COUNT } MB_PREDICTION_MODE; -/* Macroblock level features */ +// Macroblock level features typedef enum { - MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */ - MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */ - MB_LVL_MAX = 2 /* Number of MB level features supported */ - + MB_LVL_ALT_Q = 0, // Use alternate Quantizer .... + MB_LVL_ALT_LF = 1, // Use alternate loop filter value... + MB_LVL_MAX = 2, // Number of MB level features supported } MB_LVL_FEATURES; + /* Segment Feature Masks */ #define SEGMENT_ALTQ 0x01 #define SEGMENT_ALT_LF 0x02 @@ -270,6 +275,7 @@ typedef struct void *current_bc; + int corrupted; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h index 9e194dc9a..029941440 100644 --- a/vp8/common/coefupdateprobs.h +++ b/vp8/common/coefupdateprobs.h @@ -183,3 +183,176 @@ const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTE }, }, }; + +const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = +{ + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {219, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {239, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 209, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {239, 219, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 204, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 209, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 209, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 198, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 204, 204, 255, 255, 255, 255, 255, 255, 255, 255, }, + {219, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 198, 204, 255, 255, 255, 255, 255, 255, 255, 255, }, + {209, 193, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 214, 214, 255, 255, 255, 255, 255, 255, 255, 255, }, + {173, 193, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 224, 219, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 239, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 234, 224, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 234, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 255, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 255, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {234, 183, 214, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 229, 255, 249, 255, 255, 255, 255, 255, 255, }, + {229, 214, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 249, 255, 255, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 198, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 219, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 249, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 224, 255, 255, 255, 255, 255, 255, 255, 255, }, + {229, 204, 234, 249, 249, 255, 255, 255, 255, 255, 255, }, + {255, 249, 249, 255, 244, 249, 255, 255, 255, 255, 255, }, + }, + { + {255, 178, 224, 255, 249, 255, 255, 255, 255, 255, 255, }, + {234, 224, 234, 249, 255, 255, 255, 255, 255, 255, 255, }, + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 183, 229, 255, 249, 255, 255, 255, 255, 255, 255, }, + {234, 219, 234, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 249, 249, 255, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 224, 249, 255, 244, 255, 255, 255, 255, 255, }, + {219, 224, 229, 255, 255, 249, 255, 255, 255, 255, 255, }, + {255, 255, 255, 249, 249, 255, 255, 255, 255, 255, 255, }, + }, + { + {255, 193, 229, 255, 255, 255, 255, 255, 255, 255, 255, }, + {224, 224, 239, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 244, 249, 255, 255, 255, 255, 255, 255, 255, 255, }, + }, + }, + { + { + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, + {255, 239, 234, 244, 239, 244, 249, 255, 255, 255, 255, }, + }, + { + {255, 249, 239, 239, 244, 255, 255, 255, 255, 255, 255, }, + {255, 249, 244, 255, 249, 255, 255, 255, 255, 255, 255, }, + {255, 255, 239, 255, 255, 249, 255, 255, 255, 255, 255, }, + }, + { + {255, 244, 239, 239, 244, 255, 255, 255, 255, 255, 255, }, + {255, 234, 239, 234, 249, 255, 255, 255, 255, 255, 255, }, + {255, 255, 229, 239, 234, 249, 244, 255, 255, 255, 255, }, + }, + { + {255, 239, 229, 239, 234, 234, 255, 255, 255, 255, 255, }, + {255, 239, 234, 229, 244, 239, 255, 234, 255, 255, 255, }, + {255, 229, 209, 229, 239, 234, 244, 229, 255, 249, 255, }, + }, + { + {255, 239, 234, 229, 244, 249, 255, 249, 255, 255, 255, }, + {255, 234, 229, 244, 234, 249, 255, 249, 255, 255, 255, }, + {255, 229, 239, 229, 249, 255, 255, 244, 255, 255, 255, }, + }, + { + {255, 239, 234, 239, 234, 239, 255, 249, 255, 255, 255, }, + {255, 229, 234, 239, 239, 239, 255, 244, 255, 255, 255, }, + {255, 229, 234, 239, 239, 244, 255, 255, 255, 255, 255, }, + }, + { + {255, 219, 224, 229, 229, 234, 239, 224, 255, 255, 255, }, + {255, 229, 229, 224, 234, 229, 239, 239, 255, 255, 255, }, + {255, 229, 224, 239, 234, 239, 224, 224, 255, 249, 255, }, + }, + { + {255, 234, 229, 244, 229, 229, 255, 214, 255, 255, 255, }, + {255, 239, 234, 239, 214, 239, 255, 209, 255, 255, 255, }, + {249, 239, 219, 209, 219, 224, 239, 204, 255, 255, 255, }, + }, + }, + +}; diff --git a/vp8/common/common.h b/vp8/common/common.h index 9a93da991..999f79f2f 100644 --- a/vp8/common/common.h +++ b/vp8/common/common.h @@ -13,7 +13,7 @@ #define common_h 1 #include - +#include "vpx_config.h" /* Interface header for common constant data structures and lookup tables */ #include "vpx_mem/vpx_mem.h" @@ -38,5 +38,4 @@ #define vp8_zero_array( Dest, N) vpx_memset( Dest, 0, N * sizeof( *Dest)); - #endif /* common_h */ diff --git a/vp8/common/defaultcoefcounts.c b/vp8/common/defaultcoefcounts.c index b0e2e702a..a0258bd03 100644 --- a/vp8/common/defaultcoefcounts.c +++ b/vp8/common/defaultcoefcounts.c @@ -223,3 +223,180 @@ const unsigned int vp8_default_coef_counts[BLOCK_TYPES] }, }, }; + + +const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS] = +{ + + { /* block Type 0 */ + { /* Coeff Band 0 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 1 */ + { 21041, 13314, 3420, 592, 117, 0, 0, 0, 0, 0, 0, 11783}, + { 48236, 6918, 586, 153, 0, 0, 0, 0, 0, 0, 0, 23137}, + { 676112, 106685, 24701, 6003, 1426, 429, 165, 0, 0, 0, 0, 28910} + }, + { /* Coeff Band 2 */ + { 660107, 75227, 8451, 1345, 259, 0, 0, 0, 0, 0, 0, 0}, + { 79164, 36835, 6865, 1185, 246, 47, 0, 0, 0, 0, 0, 2575}, + { 19469, 14330, 3070, 579, 94, 6, 0, 0, 0, 0, 0, 44} + }, + { /* Coeff Band 3 */ + { 1978004, 235343, 28485, 3242, 271, 0, 0, 0, 0, 0, 0, 0}, + { 228684, 106736, 21431, 2842, 272, 46, 0, 0, 0, 0, 0, 9266}, + { 32470, 27496, 6852, 1386, 45, 93, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 4 */ + { 1911212, 224613, 49653, 13748, 2541, 568, 48, 0, 0, 0, 0, 0}, + { 196670, 103472, 44473, 11490, 2432, 977, 72, 0, 0, 0, 0, 9447}, + { 37876, 40417, 19142, 6069, 1799, 727, 51, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 3813399, 437714, 64387, 11312, 695, 219, 0, 0, 0, 0, 0, 0}, + { 438288, 215917, 61905, 10194, 674, 107, 0, 0, 0, 0, 0, 17808}, + { 99139, 93643, 30054, 5758, 802, 171, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 12259383, 1625505, 234927, 46306, 8417, 1456, 151, 0, 0, 0, 0, 0}, + { 1518161, 734287, 204240, 44228, 9462, 2240, 65, 0, 0, 0, 0, 107630}, + { 292470, 258894, 94925, 25864, 6662, 2055, 170, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 7 */ + { 9791308, 2118949, 169439, 16735, 1122, 0, 0, 0, 0, 0, 0, 0}, + { 1500281, 752410, 123259, 13065, 1168, 47, 0, 0, 0, 0, 0, 707182}, + { 193067, 142638, 31018, 4719, 516, 138, 0, 0, 0, 0, 0, 12439} + } + }, + { /* block Type 1 */ + { /* Coeff Band 0 */ + { 16925, 10553, 852, 16, 63, 87, 47, 0, 0, 0, 0, 31232}, + { 39777, 26839, 6822, 1908, 678, 456, 227, 168, 35, 0, 0, 46825}, + { 17300, 16666, 4168, 1209, 492, 154, 118, 207, 0, 0, 0, 19608} + }, + { /* Coeff Band 1 */ + { 35882, 31722, 4625, 1270, 266, 237, 0, 0, 0, 0, 0, 0}, + { 15426, 13894, 4482, 1305, 281, 43, 0, 0, 0, 0, 0, 18627}, + { 3900, 6552, 3472, 1723, 746, 366, 115, 35, 0, 0, 0, 798} + }, + { /* Coeff Band 2 */ + { 21998, 29132, 3353, 679, 46, 0, 0, 0, 0, 0, 0, 0}, + { 9098, 15767, 3794, 792, 268, 47, 0, 0, 0, 0, 0, 22402}, + { 4007, 8472, 2844, 687, 217, 0, 0, 0, 0, 0, 0, 2739} + }, + { /* Coeff Band 3 */ + { 0, 31414, 2911, 682, 96, 0, 0, 0, 0, 0, 0, 0}, + { 0, 16515, 4425, 938, 124, 0, 0, 0, 0, 0, 0, 31369}, + { 0, 4833, 2787, 1213, 150, 0, 0, 0, 0, 0, 0, 3744} + }, + { /* Coeff Band 4 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52762}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13326} + }, + { /* Coeff Band 7 */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + } + }, + { /* block Type 2 */ + { /* Coeff Band 0 */ + { 4444, 1614, 120, 48, 0, 48, 0, 0, 0, 0, 0, 278}, + { 192436, 103730, 24494, 9845, 4122, 1193, 102, 0, 0, 0, 0, 2577}, + { 3473446, 2308716, 815510, 370374, 167797, 92152, 12073, 86, 0, 0, 0, 6801} + }, + { /* Coeff Band 1 */ + { 2150616, 1136388, 250011, 86888, 31434, 13746, 1243, 0, 0, 0, 0, 0}, + { 1179945, 799802, 266012, 106787, 40809, 16486, 1546, 0, 0, 0, 0, 2673}, + { 465128, 504130, 286989, 146259, 62380, 30192, 2866, 20, 0, 0, 0, 0} + }, + { /* Coeff Band 2 */ + { 2157762, 1177519, 282665, 108499, 43389, 23224, 2597, 34, 0, 0, 0, 0}, + { 1135685, 813705, 278079, 123255, 53935, 29492, 3152, 39, 0, 0, 0, 2978}, + { 391894, 428037, 264216, 144306, 69326, 40281, 5541, 29, 0, 0, 0, 38} + }, + { /* Coeff Band 3 */ + { 6669109, 3468471, 782161, 288484, 115500, 51083, 4943, 41, 0, 0, 0, 0}, + { 3454493, 2361636, 809524, 337663, 141343, 65036, 6361, 0, 0, 0, 0, 8730}, + { 1231825, 1359522, 824686, 420784, 185517, 98731, 10973, 72, 0, 0, 0, 20} + }, + { /* Coeff Band 4 */ + { 7606203, 3452846, 659856, 191703, 49335, 14336, 450, 0, 0, 0, 0, 0}, + { 3806506, 2379332, 691697, 224938, 61966, 18324, 766, 0, 0, 0, 0, 8193}, + { 1270110, 1283728, 628775, 243378, 72617, 24897, 1087, 0, 0, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 15314169, 7436809, 1579928, 515790, 167453, 58305, 3502, 19, 0, 0, 0, 0}, + { 7021286, 4667922, 1545706, 574463, 191793, 68748, 4048, 1, 0, 0, 0, 17222}, + { 2011989, 2145878, 1185336, 534879, 195719, 79103, 5343, 4, 0, 0, 0, 37} + }, + { /* Coeff Band 6 */ + { 63458382, 25384462, 4208045, 1091050, 299011, 95242, 5238, 33, 0, 0, 0, 0}, + { 25638401, 14694085, 3945978, 1195420, 344813, 117355, 6703, 0, 0, 0, 0, 216811}, + { 5988177, 5824044, 2754413, 1077350, 370739, 139710, 9693, 38, 0, 0, 0, 1835} + }, + { /* Coeff Band 7 */ + { 74998348, 29342158, 2955001, 452912, 69631, 9516, 37, 0, 0, 0, 0, 0}, + { 24762356, 13281085, 2409883, 436787, 68948, 10658, 36, 0, 0, 0, 0, 6614989}, + { 3882867, 3224489, 1052289, 252890, 46967, 8548, 154, 0, 0, 0, 0, 194354} + } + }, + { /* block Type 3 */ + { /* Coeff Band 0 */ + { 10583, 12059, 3155, 1041, 248, 175, 24, 2, 0, 0, 0, 5717}, + { 42461, 41782, 13553, 4966, 1352, 855, 89, 0, 0, 0, 0, 15000}, + { 4691125, 5045589, 2673566, 1089317, 378161, 160268, 18252, 813, 69, 13, 0, 49} + }, + { /* Coeff Band 1 */ + { 1535203, 1685686, 924565, 390329, 141709, 60523, 5983, 171, 0, 0, 0, 0}, + { 1594021, 1793276, 1016078, 441332, 164159, 70843, 8098, 311, 0, 0, 0, 11312}, + { 1225223, 1430184, 888492, 460713, 203286, 115149, 22061, 804, 7, 0, 0, 0} + }, + { /* Coeff Band 2 */ + { 1522386, 1590366, 799910, 303691, 96625, 37608, 3637, 180, 33, 11, 0, 0}, + { 1682184, 1793869, 913649, 353520, 113674, 46309, 4736, 221, 18, 3, 0, 963}, + { 1574580, 1740474, 954392, 417994, 151400, 67091, 8000, 536, 73, 10, 0, 63} + }, + { /* Coeff Band 3 */ + { 4963672, 5197790, 2585383, 982161, 313333, 118498, 16014, 536, 62, 0, 0, 0}, + { 5223913, 5569803, 2845858, 1107384, 364949, 147841, 18296, 658, 11, 11, 0, 1866}, + { 4042207, 4548894, 2608767, 1154993, 446290, 221295, 41054, 2438, 124, 20, 0, 0} + }, + { /* Coeff Band 4 */ + { 3857216, 4431325, 2670447, 1330169, 553301, 286825, 46763, 1917, 0, 0, 0, 0}, + { 4226215, 4963701, 3046198, 1523923, 644670, 355519, 58792, 2525, 0, 0, 0, 1298}, + { 3831873, 4580350, 3018580, 1660048, 797298, 502983, 123906, 7172, 16, 0, 0, 0} + }, + { /* Coeff Band 5 */ + { 8524543, 9285149, 4979435, 2039330, 683458, 266032, 22628, 270, 0, 0, 0, 0}, + { 9432163, 10428088, 5715661, 2385738, 838389, 326264, 29981, 361, 0, 0, 0, 884}, + { 9039066, 10368964, 6136765, 2862030, 1098269, 511668, 63105, 945, 14, 0, 0, 0} + }, + { /* Coeff Band 6 */ + { 33222872, 34748297, 17701695, 7214933, 2602336, 1191859, 187873, 12667, 390, 3, 0, 0}, + { 34765051, 37140719, 19525578, 8268934, 3085012, 1473864, 246743, 15258, 736, 3, 0, 8403}, + { 28591289, 32252393, 19037068, 9213729, 4020653, 2372354, 586420, 67428, 3920, 92, 7, 3} + }, + { /* Coeff Band 7 */ + { 68604786, 60777665, 19712887, 5656955, 1520443, 507166, 51829, 2466, 10, 0, 0, 0}, + { 55447403, 51682540, 19008774, 5928582, 1706884, 595531, 65998, 3661, 101, 0, 0, 8468343}, + { 28321970, 29149398, 13565882, 5258675, 1868588, 898041, 192023, 21497, 672, 17, 0, 1884921} + } + } + }; diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h index 7a1e28b7b..293e74269 100644 --- a/vp8/common/defaultcoefcounts.h +++ b/vp8/common/defaultcoefcounts.h @@ -18,4 +18,9 @@ extern const unsigned int vp8_default_coef_counts[BLOCK_TYPES] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; -#endif //__DEFAULTCOEFCOUNTS_H +extern const unsigned int vp8_default_coef_counts_8x8[BLOCK_TYPES] + [COEF_BANDS] + [PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS]; + +#endif \ No newline at end of file diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index 5044c25b9..58328ceb5 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -59,6 +59,24 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = 9, 12, 13, 10, 7, 11, 14, 15, }; +#if CONFIG_T8X8 +DECLARE_ALIGNED(64, cuchar, vp8_coef_bands_8x8[64]) = { 0, 1, 2, 3, 5, 4, 4, 5, + 5, 3, 6, 3, 5, 4, 6, 6, + 6, 5, 5, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 +}; +DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]) = +{ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, +}; +#endif DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = { @@ -69,6 +87,9 @@ DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = }; DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]); +#if CONFIG_T8X8 +DECLARE_ALIGNED(64, short, vp8_default_zig_zag_mask_8x8[64]);//int64_t +#endif const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6}; @@ -107,12 +128,16 @@ static vp8_tree_index cat1[2], cat2[4], cat3[6], cat4[8], cat5[10], cat6[22]; void vp8_init_scan_order_mask() { int i; - for (i = 0; i < 16; i++) { vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i; } - +#if CONFIG_T8X8 + for (i = 0; i < 64; i++) + { + vp8_default_zig_zag_mask_8x8[vp8_default_zig_zag1d_8x8[i]] = 1 << i; + } +#endif } static void init_bit_tree(vp8_tree_index *p, int n) @@ -158,7 +183,6 @@ vp8_extra_bit_struct vp8_extra_bits[12] = void vp8_default_coef_probs(VP8_COMMON *pc) { int h = 0; - do { int i = 0; @@ -183,8 +207,32 @@ void vp8_default_coef_probs(VP8_COMMON *pc) while (++i < COEF_BANDS); } while (++h < BLOCK_TYPES); -} +#if CONFIG_T8X8 + h = 0; + do + { + int i = 0; + + do + { + int k = 0; + + do + { + unsigned int branch_ct [ENTROPY_NODES] [2]; + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + pc->fc.coef_probs_8x8 [h][i][k], branch_ct, vp8_default_coef_counts_8x8 [h][i][k], + 256, 1); + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++i < COEF_BANDS); + } + while (++h < BLOCK_TYPES); +#endif +} void vp8_coef_tree_initialize() { diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index 3c25453a7..786452712 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -14,7 +14,7 @@ #include "treecoder.h" #include "blockd.h" - +#include "common.h" /* Coefficient token alphabet */ #define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ @@ -64,6 +64,9 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ #define COEF_BANDS 8 extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); +#if CONFIG_T8X8 +extern DECLARE_ALIGNED(64, const unsigned char, vp8_coef_bands_8x8[64]); +#endif /* Inside dimension is 3-valued measure of nearby complexity, that is, the extent to which nearby coefficients are nonzero. For the first @@ -87,15 +90,20 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; - +#if CONFIG_T8X8 +extern const vp8_prob vp8_coef_update_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif struct VP8Common; void vp8_default_coef_probs(struct VP8Common *); - extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]); extern short vp8_default_zig_zag_mask[16]; +#if CONFIG_T8X8 +extern DECLARE_ALIGNED(64, const int, vp8_default_zig_zag1d_8x8[64]); +extern short vp8_default_zig_zag_mask_8x8[64];//int64_t +#endif extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; - void vp8_coef_tree_initialize(void); + #endif diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 47b13c755..1acc0157b 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -75,7 +75,13 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c; rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c; - +#if CONFIG_T8X8 + rtcd->idct.idct8 = vp8_short_idct8x8_c; + rtcd->idct.idct8_1 = vp8_short_idct8x8_1_c; + rtcd->idct.idct1_scalar_add_8x8 = vp8_dc_only_idct_add_8x8_c; + rtcd->idct.ihaar2 = vp8_short_ihaar2x2_c; + rtcd->idct.ihaar2_1 = vp8_short_ihaar2x2_1_c; +#endif rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; rtcd->recon.copy8x8 = vp8_copy_mem8x8_c; rtcd->recon.copy8x4 = vp8_copy_mem8x4_c; diff --git a/vp8/common/idct.h b/vp8/common/idct.h index f5fd94dfd..aa1fb5e42 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -31,6 +31,34 @@ #include "arm/idct_arm.h" #endif +#if CONFIG_T8X8 +#ifndef vp8_idct_idct8 +#define vp8_idct_idct8 vp8_short_idct8x8_c +#endif +extern prototype_idct(vp8_idct_idct8); + +#ifndef vp8_idct_idct8_1 +#define vp8_idct_idct8_1 vp8_short_idct8x8_1_c +#endif +extern prototype_idct(vp8_idct_idct8_1); + +#ifndef vp8_idct_ihaar2 +#define vp8_idct_ihaar2 vp8_short_ihaar2x2_c +#endif +extern prototype_idct(vp8_idct_ihaar2); + +#ifndef vp8_idct_ihaar2_1 +#define vp8_idct_ihaar2_1 vp8_short_ihaar2x2_1_c +#endif +extern prototype_idct(vp8_idct_ihaar2_1); + +#ifndef vp8_idct_idct1_scalar_add_8x8 +#define vp8_idct_idct1_scalar_add_8x8 vp8_dc_only_idct_add_8x8_c +#endif +extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add_8x8); + +#endif + #ifndef vp8_idct_idct1 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_c #endif @@ -46,7 +74,6 @@ extern prototype_idct(vp8_idct_idct16); #endif extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add); - #ifndef vp8_idct_iwalsh1 #define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_c #endif @@ -69,6 +96,14 @@ typedef struct vp8_second_order_fn_t iwalsh1; vp8_second_order_fn_t iwalsh16; + +#if CONFIG_T8X8 + vp8_idct_fn_t idct8; + vp8_idct_fn_t idct8_1; + vp8_idct_scalar_add_fn_t idct1_scalar_add_8x8; + vp8_idct_fn_t ihaar2; + vp8_idct_fn_t ihaar2_1; +#endif } vp8_idct_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index c65d35adc..9d3e30413 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -24,9 +24,13 @@ **************************************************************************/ #include "vpx_ports/config.h" + +#include + static const int cospi8sqrt2minus1 = 20091; static const int sinpi8sqrt2 = 35468; static const int rounding = 0; + void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) { int i; @@ -199,6 +203,7 @@ void vp8_short_inv_walsh4x4_c(short *input, short *output) ip += 4; op += 4; } + //printf("here2\n"); } void vp8_short_inv_walsh4x4_1_c(short *input, short *output) @@ -208,9 +213,9 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output) short *op = output; #if !CONFIG_EXTEND_QRANGE - a1 = (input[0] + 3 )>> 3; + a1 = ((input[0] + 3) >> 3); #else - a1 = (input[0] + 1 )>> 2; + a1 = ((input[0] + 1) >> 2); #endif for (i = 0; i < 4; i++) @@ -222,3 +227,312 @@ void vp8_short_inv_walsh4x4_1_c(short *input, short *output) op += 4; } } + +#if CONFIG_T8X8 + +#define FAST_IDCT_8X8 + +void vp8_short_idct8x8_1_c(short *input, short *output, int pitch) +{ + int i, b; + int a1; + short *op = output; + short *orig_op = output; + int shortpitch = pitch >> 1; + a1 = ((input[0] + 4) >> 3); + for (b = 0; b < 4; b++) + { + for (i = 0; i < 4; i++) + { + op[0] = a1; + op[1] = a1; + op[2] = a1; + op[3] = a1; + op += shortpitch; + } + op = orig_op + (b+1)%2*4 +(b+1)/2*4*shortpitch; + } +} + +void vp8_dc_only_idct_add_8x8_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride) +{ + int a1 = ((input_dc + 4) >> 3); + int r, c, b; + unsigned char *orig_pred = pred_ptr; + unsigned char *orig_dst = dst_ptr; + for (b = 0; b < 4; b++) + { + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = a1 + pred_ptr[c] ; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dst_ptr[c] = (unsigned char) a ; + } + + dst_ptr += stride; + pred_ptr += pitch; + } + dst_ptr = orig_dst + (b+1)%2*4 + (b+1)/2*4*stride; + pred_ptr = orig_pred + (b+1)%2*4 + (b+1)/2*4*pitch; + } +} + +#ifdef FAST_IDCT_8X8 + +#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */ +#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */ +#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */ +#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */ +#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */ +#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */ + +/* row (horizontal) IDCT + * + * 7 pi 1 dst[k] = sum c[l] * src[l] * cos( -- * + * ( k + - ) * l ) l=0 8 2 + * + * where: c[0] = 128 c[1..7] = 128*sqrt(2) */ + +static void idctrow (int *blk) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = blk[4] << 11) | (x2 = blk[6]) | (x3 = blk[2]) | + (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) + { + blk[0] = blk[1] = blk[2] = blk[3] = blk[4] = blk[5] = blk[6] = blk[7] = blk[0] << 3; + return; + } + x0 = (blk[0] << 11) + 128; /* for proper rounding in the fourth stage */ + + /* first stage */ + x8 = W7 * (x4 + x5); + x4 = x8 + (W1 - W7) * x4; + x5 = x8 - (W1 + W7) * x5; + x8 = W3 * (x6 + x7); + x6 = x8 - (W3 - W5) * x6; + x7 = x8 - (W3 + W5) * x7; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2); + x2 = x1 - (W2 + W6) * x2; + x3 = x1 + (W2 - W6) * x3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + blk[0] = (x7 + x1) >> 8; + blk[1] = (x3 + x2) >> 8; + blk[2] = (x0 + x4) >> 8; + blk[3] = (x8 + x6) >> 8; + blk[4] = (x8 - x6) >> 8; + blk[5] = (x0 - x4) >> 8; + blk[6] = (x3 - x2) >> 8; + blk[7] = (x7 - x1) >> 8; +} + +/* column (vertical) IDCT + * + * 7 pi 1 dst[8*k] = sum c[l] * src[8*l] * + * cos( -- * ( k + - ) * l ) l=0 8 2 + * + * where: c[0] = 1/1024 c[1..7] = (1/1024)*sqrt(2) */ +static void idctcol (int *blk) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = (blk[8 * 4] << 8)) | (x2 = blk[8 * 6]) | (x3 = blk[8 * 2]) | + (x4 = blk[8 * 1]) | (x5 = blk[8 * 7]) | (x6 = blk[8 * 5]) | (x7 = blk[8 * 3]))) + { + blk[8 * 0] = blk[8 * 1] = blk[8 * 2] = blk[8 * 3] = blk[8 * 4] = blk[8 * 5] = blk[8 * 6] = blk[8 * 7] = + ((blk[8 * 0] + 32) >> 6); + return; + } + x0 = (blk[8 * 0] << 8) + 8192; + + /* first stage */ + x8 = W7 * (x4 + x5) + 4; + x4 = (x8 + (W1 - W7) * x4) >> 3; + x5 = (x8 - (W1 + W7) * x5) >> 3; + x8 = W3 * (x6 + x7) + 4; + x6 = (x8 - (W3 - W5) * x6) >> 3; + x7 = (x8 - (W3 + W5) * x7) >> 3; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2) + 4; + x2 = (x1 - (W2 + W6) * x2) >> 3; + x3 = (x1 + (W2 - W6) * x3) >> 3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + blk[8 * 0] = (x7 + x1) >> 14; + blk[8 * 1] = (x3 + x2) >> 14; + blk[8 * 2] = (x0 + x4) >> 14; + blk[8 * 3] = (x8 + x6) >> 14; + blk[8 * 4] = (x8 - x6) >> 14; + blk[8 * 5] = (x0 - x4) >> 14; + blk[8 * 6] = (x3 - x2) >> 14; + blk[8 * 7] = (x7 - x1) >> 14; +} + +#define TX_DIM 8 +void vp8_short_idct8x8_c(short *coefs, short *block, int pitch) +// an approximate 8x8 dct implementation, but not used +{ + int X[TX_DIM*TX_DIM]; + int i,j; + int shortpitch = pitch >> 1; + + for (i = 0; i < TX_DIM; i++) + { + for (j = 0; j < TX_DIM; j++) + { + X[i * TX_DIM + j] = (int)coefs[i * TX_DIM + j]; + } + } + for (i = 0; i < 8; i++) + idctrow (X + 8 * i); + + for (i = 0; i < 8; i++) + idctcol (X + i); + + for (i = 0; i < TX_DIM; i++) + { + for (j = 0; j < TX_DIM; j++) + { + block[i*shortpitch+j] = X[i * TX_DIM + j]>>1; + } + } +} + +#else + +/* This is really for testing */ +void vp8_short_idct8x8_c(short *input, short *output, int pitch) +{ + int X[8][8]; + double C[8][8]={{0.0}}, Ct[8][8]={{0.0}}, temp[8][8]={{0.0}}; + int i,j,k; + double temp1=0.0; + double pi = atan( 1.0 ) * 4.0; + //static int count=0; + + int shortpitch = pitch >> 1; + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + X[i][j] = input[i * 8 + j]; + } + } + + // TODO: DCT matrix should be calculated once for all + for ( j = 0 ; j < 8 ; j++ ) { + C[ 0 ][ j ] = 1.0 / sqrt( (double) 8 ); + Ct[ j ][ 0 ] = C[ 0 ][ j ]; + } + for ( i = 1 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + C[ i ][ j ] = sqrt( 2.0 / 8 ) * + cos( pi * ( 2 * j + 1 ) * i / ( 2.0 * 8 ) ); + Ct[ j ][ i ] = C[ i ][ j ]; + } + } + /* MatrixMultiply( temp, input, C ); */ + for ( i = 0 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + temp[ i ][ j ] = 0.0; + for ( k = 0 ; k < 8 ; k++ ) + temp[ i ][ j ] += X[ i ][ k ] * C[ k ][ j ]; + } + } + + /* MatrixMultiply( output, Ct, temp ); */ + for ( i = 0 ; i < 8 ; i++ ) { + for ( j = 0 ; j < 8 ; j++ ) { + temp1 = 0.0; + for ( k = 0 ; k < 8 ; k++ ) + temp1 += Ct[ i ][ k ] * temp[ k ][ j ]; + X[ i ][ j ] = floor( temp1/ 2.0 + 0.5); + } + } + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + output[i*shortpitch+j] = X[i][j]; + } + } +} +#endif + +void vp8_short_ihaar2x2_c(short *input, short *output, int pitch) +{ + int i, x; + short *ip = input; //0,1, 4, 8 + short *op = output; + for (i = 0; i < 16; i++) + { + op[i] = 0; + } + + x = (ip[0] + ip[1] + ip[4] + ip[8]); + op[0] = (x>=0?x+1:x-1)>>2; + x = (ip[0] - ip[1] + ip[4] - ip[8]); + op[1] = (x>=0?x+1:x-1)>>2; + x = (ip[0] + ip[1] - ip[4] - ip[8]); + op[4] = (x>=0?x+1:x-1)>>2; + x = (ip[0] - ip[1] - ip[4] + ip[8]); + op[8] = (x>=0?x+1:x-1)>>2; +} + +void vp8_short_ihaar2x2_1_c(short *input, short *output, int pitch) +{ + int a1; + short *ip = input; + short *op = output; + a1 = ((ip[0]>=0?ip[0]+1:ip[0]-1) >> 2); + op[0] = a1; + op[2] = a1; + op[8] = a1; + op[10] = a1; + +} +#endif diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index 81a3f2d89..d361b654a 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -24,13 +24,24 @@ static void recon_dcblock(MACROBLOCKD *x) } } +#if CONFIG_T8X8 +static void recon_dcblock_8x8(MACROBLOCKD *x) +{ + BLOCKD *b = &x->block[24]; //for coeff 0, 2, 8, 10 + x->block[0].dqcoeff[0] = b->diff[0]; + x->block[4].dqcoeff[0] = b->diff[1]; + x->block[8].dqcoeff[0] = b->diff[4]; + x->block[12].dqcoeff[0] = b->diff[8]; + +} +#endif void vp8_inverse_transform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) { - if (b->eob > 1) - IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch); - else + if (b->eob <= 1) IDCT_INVOKE(rtcd, idct1)(b->dqcoeff, b->diff, pitch); + else + IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch); } @@ -86,3 +97,77 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x } } + +#if CONFIG_T8X8 +void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch)//pay attention to use when 8x8 +{ + // int b,i; + //if (b->eob > 1) + IDCT_INVOKE(rtcd, idct8)(input_dqcoeff, output_coeff, pitch); + //else + //IDCT_INVOKE(rtcd, idct8_1)(b->dqcoeff, b->diff, pitch);//pitch + +} + + +void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + // do 2nd order transform on the dc block + IDCT_INVOKE(rtcd, ihaar2)(x->block[24].dqcoeff, x->block[24].diff, 8); + + recon_dcblock_8x8(x); //need to change for 8x8 + for (i = 0; i < 9; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32); + } + +} +void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + for (i = 16; i < 24; i += 4) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16); + } + +} + + +void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + int i; + + if (x->mode_info_context->mbmi.mode != B_PRED && + x->mode_info_context->mbmi.mode != SPLITMV) + { + // do 2nd order transform on the dc block + + IDCT_INVOKE(rtcd, ihaar2)(&x->block[24].dqcoeff[0], x->block[24].diff, 8);//dqcoeff[0] + recon_dcblock_8x8(x); //need to change for 8x8 + + } + + for (i = 0; i < 9; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i+2].dqcoeff[0], &x->block[i].diff[0], 32); + } + + + for (i = 16; i < 24; i += 4) + { + vp8_inverse_transform_b_8x8(rtcd, &x->block[i].dqcoeff[0], &x->block[i].diff[0], 16); + } + +} +#endif diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index b3ffb7073..1466a5844 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -20,4 +20,11 @@ extern void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBL extern void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); extern void vp8_inverse_transform_mbuv(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#if CONFIG_T8X8 +extern void vp8_inverse_transform_b_8x8(const vp8_idct_rtcd_vtable_t *rtcd, short *input_dqcoeff, short *output_coeff, int pitch); +extern void vp8_inverse_transform_mb_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +extern void vp8_inverse_transform_mby_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +extern void vp8_inverse_transform_mbuv_8x8(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x); +#endif + #endif diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 545798ac7..b9ef26576 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -126,6 +126,7 @@ extern "C" //(5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first // encoding pass to create the compressed output using the highest possible quality, and taking a // longer amount of time to encode.. ( speed setting ignored ) + int Mode; // // Key Framing Operations diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 0615262e2..246fa116d 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -44,6 +44,9 @@ typedef struct frame_contexts vp8_prob uv_mode_prob [VP8_UV_MODES-1]; vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#if CONFIG_T8X8 + vp8_prob coef_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; +#endif MV_CONTEXT mvc[2]; MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */ } FRAME_CONTEXT; diff --git a/vp8/common/recon.c b/vp8/common/recon.c index d72d6e410..c82639645 100644 --- a/vp8/common/recon.c +++ b/vp8/common/recon.c @@ -12,7 +12,7 @@ #include "vpx_ports/config.h" #include "recon.h" #include "blockd.h" - +#include void vp8_recon_b_c ( unsigned char *pred_ptr, @@ -133,6 +133,7 @@ void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } #endif + } void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) @@ -171,6 +172,7 @@ void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } + for (i = 16; i < 24; i += 2) { BLOCKD *b = &x->block[i]; @@ -178,4 +180,5 @@ void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } #endif + } diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index 3b0405ca1..fb1e90939 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -464,7 +464,3 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel) } } } - - - - diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c index fe5a427cd..815112ed3 100644 --- a/vp8/decoder/decodemv.c +++ b/vp8/decoder/decodemv.c @@ -18,6 +18,8 @@ #if CONFIG_DEBUG #include #endif +extern int frame_count; + static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p) { const int i = vp8_treed_read(bc, vp8_bmode_tree, p); @@ -60,6 +62,7 @@ static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x else mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1])); } + //printf("vp8_read_mb_features Segment = %d of frame %d\n", mi->segment_id, frame_count); } static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_col) @@ -77,6 +80,7 @@ static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_co if (pbi->mb.update_mb_segmentation_map) vp8_read_mb_features(bc, &m->mbmi, &pbi->mb); + //printf("vp8_kfread_modes segment = %d\n", m->mbmi.segment_id); /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */ if (pbi->common.mb_no_coeff_skip) @@ -312,12 +316,14 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, { mbmi->segment_id = pbi->segmentation_map[index]; mbmi->segment_flag = 0; + //printf("vp8_read_mb_modes_mv Temporal Update if %d\n", mbmi->segment_id); } else { vp8_read_mb_features(bc, &mi->mbmi, &pbi->mb); mbmi->segment_flag = 1; pbi->segmentation_map[index] = mbmi->segment_id; + //printf("vp8_read_mb_modes_mv Temporal Update else %d\n", mbmi->segment_id); } } @@ -325,6 +331,7 @@ static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, { vp8_read_mb_features(bc, &mi->mbmi, &pbi->mb); pbi->segmentation_map[index] = mbmi->segment_id; + //printf("vp8_read_mb_modes_mv Not Temporal Update %d\n", mbmi->segment_id); } index++; #else @@ -550,4 +557,3 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) mi++; /* skip left predictor each row */ } } - diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 4d02fdec4..09fb6d0df 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -40,6 +40,10 @@ #include #include +#ifdef DEC_DEBUG +int dec_debug = 0; +#endif + void vp8cx_init_de_quantizer(VP8D_COMP *pbi) { int i; @@ -125,6 +129,16 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); +#ifdef DEC_DEBUG + if (dec_debug) { + int i, j; + printf("Generating predictors\n"); + for (i=0;i<16;i++) { + for (j=0;j<16;j++) printf("%3d ", xd->dst.y_buffer[i*xd->dst.y_stride+j]); + printf("\n"); + } + } +#endif } } @@ -192,7 +206,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, } else { - eobtotal = vp8_decode_mb_tokens(pbi, xd); + +#if CONFIG_T8X8 + for(i = 0; i < 25; i++) + { + xd->block[i].eob = 0; + xd->eobs[i] = 0; + } + if (xd->mode_info_context->mbmi.segment_id >= 2) + eobtotal = vp8_decode_mb_tokens_8x8(pbi, xd); + else +#endif + eobtotal = vp8_decode_mb_tokens(pbi, xd); +#ifdef DEC_DEBUG + if (dec_debug) { + printf("\nTokens (%d)\n", eobtotal); + for (i =0; i<400; i++) { + printf("%3d ", xd->qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("\n"); + } +#endif } /* Perform temporary clamping of the MV to be used for prediction */ @@ -206,14 +241,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, if (eobtotal == 0 && mode != B_PRED && mode != SPLITMV) { /* Special case: Force the loopfilter to skip when eobtotal and - * mb_skip_coeff are zero. - * */ + * mb_skip_coeff are zero. + * */ xd->mode_info_context->mbmi.mb_skip_coeff = 1; skip_recon_mb(pbi, xd); return; } - if (xd->segmentation_enabled) mb_init_dequantizer(pbi, xd); @@ -225,7 +259,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, if (mode != B_PRED) { RECON_INVOKE(&pbi->common.rtcd.recon, - build_intra_predictors_mby)(xd); + build_intra_predictors_mby)(xd); } else { vp8_intra_prediction_down_copy(xd); } @@ -241,8 +275,8 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, vp8dx_bool_error(xd->current_bc))) { /* MB with corrupt residuals or corrupt mode/motion vectors. - * Better to use the predictor as reconstruction. - */ + * Better to use the predictor as reconstruction. + */ vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); vp8_conceal_corrupt_mb(xd); return; @@ -256,7 +290,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, { BLOCKD *b = &xd->block[i]; RECON_INVOKE(RTCD_VTABLE(recon), intra4x4_predict) - (b, b->bmi.as_mode, b->predictor); + (b, b->bmi.as_mode, b->predictor); if (xd->eobs[i] > 1) { @@ -272,14 +306,25 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[0] = 0; } } - } else if (mode == SPLITMV) { - DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs); +#if CONFIG_T8X8 + if(xd->mode_info_context->mbmi.segment_id >= 2) + { + DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block_8x8) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd); + } + else +#endif + { + DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); + } } else { @@ -288,10 +333,23 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, DEQUANT_INVOKE(&pbi->dequant, block)(b); /* do 2nd order transform on the dc block */ - if (xd->eobs[24] > 1) +#if CONFIG_T8X8 + if(xd->mode_info_context->mbmi.segment_id >= 2) { - IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; + DEQUANT_INVOKE(&pbi->dequant, block_8x8)(b); +#ifdef DEC_DEBUG + if (dec_debug) + { + int j; + printf("DQcoeff Haar\n"); + for (j=0;j<16;j++) { + printf("%d ", b->dqcoeff[j]); + } + printf("\n"); + } +#endif + IDCT_INVOKE(RTCD_VTABLE(idct), ihaar2)(&b->dqcoeff[0], b->diff, 8); + ((int *)b->qcoeff)[0] = 0;//2nd order block are set to 0 after inverse transform ((int *)b->qcoeff)[1] = 0; ((int *)b->qcoeff)[2] = 0; ((int *)b->qcoeff)[3] = 0; @@ -299,24 +357,61 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[5] = 0; ((int *)b->qcoeff)[6] = 0; ((int *)b->qcoeff)[7] = 0; + DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block_8x8) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + } + else +#endif { - IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); - ((int *)b->qcoeff)[0] = 0; + DEQUANT_INVOKE(&pbi->dequant, block)(b); + if (xd->eobs[24] > 1) + { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + } + else + { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + } + + DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd->block[24].diff); } + } +#if CONFIG_T8X8 + if(xd->mode_info_context->mbmi.segment_id >= 2) + { + DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block_8x8)// + (xd->qcoeff+16*16, xd->block[16].dequant, + xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16, xd);// + + } + else +#endif + { - DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) - (xd->qcoeff, xd->block[0].dequant, - xd->predictor, xd->dst.y_buffer, - xd->dst.y_stride, xd->eobs, xd->block[24].diff); + DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) + (xd->qcoeff+16*16, xd->block[16].dequant, + xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16); } - DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) - (xd->qcoeff+16*16, xd->block[16].dequant, - xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd->eobs+16); -} + } static int get_delta_q(vp8_reader *bc, int prev, int *q_update) @@ -423,6 +518,9 @@ decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd) vp8_build_uvmvs(xd, pc->full_pixel); +#ifdef DEC_DEBUG + dec_debug = (pc->current_video_frame==5 && mb_row==2 && mb_col==3); +#endif /* if(pc->current_video_frame==0 &&mb_col==1 && mb_row==0) pbi->debugoutput =1; @@ -433,7 +531,6 @@ decode_mb_row(VP8D_COMP *pbi, VP8_COMMON *pc, int mb_row, MACROBLOCKD *xd) /* check if the boolean decoder has suffered an error */ xd->corrupted |= vp8dx_bool_error(xd->current_bc); - recon_yoffset += 16; recon_uvoffset += 8; @@ -992,13 +1089,34 @@ int vp8_decode_frame(VP8D_COMP *pbi) } } } +#if CONFIG_T8X8 + { + // read coef probability tree + + for (i = 0; i < BLOCK_TYPES; i++) + for (j = 0; j < COEF_BANDS; j++) + for (k = 0; k < PREV_COEF_CONTEXTS; k++) + for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) + { + + vp8_prob *const p = pc->fc.coef_probs_8x8 [i][j][k] + l; + + if (vp8_read(bc, vp8_coef_update_probs_8x8 [i][j][k][l])) + { + *p = (vp8_prob)vp8_read_literal(bc, 8); + + } + } + } +#endif vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG)); vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG)); #if CONFIG_SEGMENTATION // Create the encoder segmentation map and set all entries to 0 - CHECK_MEM_ERROR(pbi->segmentation_map, vpx_calloc((pc->mb_rows * pc->mb_cols), 1)); + if (!pbi->segmentation_map) + CHECK_MEM_ERROR(pbi->segmentation_map, vpx_calloc((pc->mb_rows * pc->mb_cols), 1)); #endif /* set up frame new frame for intra coded blocks */ @@ -1102,6 +1220,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) fclose(f); } #endif + //printf("Frame %d Done\n", frame_count++); return 0; } diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c index dd0c13b7d..0e17cf9af 100644 --- a/vp8/decoder/dequantize.c +++ b/vp8/decoder/dequantize.c @@ -13,13 +13,22 @@ #include "dequantize.h" #include "vp8/common/idct.h" #include "vpx_mem/vpx_mem.h" +#include "onyxd_int.h" extern void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) ; extern void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch); +#if CONFIG_T8X8 +extern void vp8_short_idct8x8_c(short *input, short *output, int pitch); +extern void vp8_short_idct8x8_1_c(short *input, short *output, int pitch); +#endif +#ifdef DEC_DEBUG +extern int dec_debug; +#endif void vp8_dequantize_b_c(BLOCKD *d) { + int i; short *DQ = d->dqcoeff; short *Q = d->qcoeff; @@ -27,7 +36,7 @@ void vp8_dequantize_b_c(BLOCKD *d) for (i = 0; i < 16; i++) { - DQ[i] = Q[i] * DQC[i]; + DQ[i] = Q[i] * DQC[i]; } } @@ -41,12 +50,12 @@ void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, for (i = 0; i < 16; i++) { - input[i] = dq[i] * input[i]; + input[i] = dq[i] * input[i]; + } /* the idct halves ( >> 1) the pitch */ vp8_short_idct4x4llm_c(input, output, 4 << 1); - vpx_memset(input, 0, 32); for (r = 0; r < 4; r++) @@ -79,18 +88,17 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, short *diff_ptr = output; int r, c; + input[0] = (short)Dc; for (i = 1; i < 16; i++) { - input[i] = dq[i] * input[i]; + input[i] = dq[i] * input[i]; } /* the idct halves ( >> 1) the pitch */ vp8_short_idct4x4llm_c(input, output, 4 << 1); - vpx_memset(input, 0, 32); - for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) @@ -111,3 +119,211 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, pred += pitch; } } + +#if CONFIG_T8X8 +void vp8_dequantize_b_8x8_c(BLOCKD *d)//just for 2x2 haar transform +{ + int i; + short *DQ = d->dqcoeff; + short *Q = d->qcoeff; + short *DQC = d->dequant; + + for (i = 0; i < 16; i++) + { + DQ[i] = (short)(Q[i] * DQC[i]); + } +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Dequantize 2x2\n"); + for (j=0;j<16;j++) printf("%d ", Q[j]); printf("\n"); + for (j=0;j<16;j++) printf("%d ", DQ[j]); printf("\n"); + } +#endif +} + +void vp8_dequant_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride)//, MACROBLOCKD *xd, short blk_idx +{ + short output[64]; + short *diff_ptr = output; + int r, c, b; + int i; + unsigned char *origdest = dest; + unsigned char *origpred = pred; + +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Input 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", input[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + // recover quantizer for 4 4x4 blocks + for (i = 0; i < 64; i++) + { + input[i]=input[i] * dq[i!=0]; + } +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Input DQ 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", input[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + + // the idct halves ( >> 1) the pitch + vp8_short_idct8x8_c(input, output, 16); +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Output 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", output[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + + vpx_memset(input, 0, 128);// test what should i put here + + for (b = 0; b < 4; b++) + { + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = diff_ptr[c] + pred[c]; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dest[c] = (unsigned char) a; + } + + dest += stride; + diff_ptr += 8; + pred += pitch; + } + diff_ptr = output + (b+1) / 2 * 4 * 8 + (b+1) % 2 * 4; + dest = origdest + (b+1) / 2 * 4 * stride + (b+1) % 2 * 4; + pred = origpred + (b+1) / 2 * 4 * pitch + (b+1) % 2 * 4; + } +#ifdef DEC_DEBUG + if (dec_debug) { + int k,j; + printf("Final 8x8\n"); + for (j=0;j<8;j++) { + for (k=0;k<8;k++) { + printf("%d ", origdest[k]); + } + printf("\n"); + origdest+=stride; + } + } +#endif +} + +void vp8_dequant_dc_idct_add_8x8_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride, + int Dc)// Dc for 1st order T in some rear case +{ + short output[64]; + short *diff_ptr = output; + int r, c, b; + int i; + unsigned char *origdest = dest; + unsigned char *origpred = pred; + + input[0] = (short)Dc;//Dc is the reconstructed value, do not need dequantization + //dc value is recovered after dequantization, since dc need not quantization +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Input 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", input[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + for (i = 1; i < 64; i++) + { + input[i] = input[i] * dq[i!=0]; + } + +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Input DQ 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", input[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + + // the idct halves ( >> 1) the pitch + vp8_short_idct8x8_c(input, output,16); +#ifdef DEC_DEBUG + if (dec_debug) { + int j; + printf("Output 8x8\n"); + for (j=0;j<64;j++) { + printf("%d ", output[j]); + if (j%8 == 7) printf("\n"); + } + } +#endif + vpx_memset(input, 0, 128); + + for (b = 0; b < 4; b++) + { + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = diff_ptr[c] + pred[c]; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dest[c] = (unsigned char) a; + } + + dest += stride; + diff_ptr += 8; + pred += pitch; + } + diff_ptr = output + (b+1) / 2 * 4 * 8 + (b+1) % 2 * 4; + dest = origdest + (b+1) / 2 * 4 * stride + (b+1) % 2 * 4; + pred = origpred + (b+1) / 2 * 4 * pitch + (b+1) % 2 * 4; + } +#ifdef DEC_DEBUG + if (dec_debug) { + int k,j; + printf("Final 8x8\n"); + for (j=0;j<8;j++) { + for (k=0;k<8;k++) { + printf("%d ", origdest[k]); + } + printf("\n"); + origdest+=stride; + } + } +#endif +} + +#endif diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h index 2e662a593..15d5bbe91 100644 --- a/vp8/decoder/dequantize.h +++ b/vp8/decoder/dequantize.h @@ -42,6 +42,25 @@ unsigned char *pre, unsigned char *dst_u, \ unsigned char *dst_v, int stride, char *eobs) +#if 1//CONFIG_T8X8 +#define prototype_dequant_dc_idct_add_y_block_8x8(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst, \ + int stride, char *eobs, short *dc, MACROBLOCKD *xd) + +#define prototype_dequant_idct_add_y_block_8x8(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst, \ + int stride, char *eobs, MACROBLOCKD *xd) + +#define prototype_dequant_idct_add_uv_block_8x8(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst_u, \ + unsigned char *dst_v, int stride, char *eobs, \ + MACROBLOCKD *xd) + +#endif + #if ARCH_X86 || ARCH_X86_64 #include "x86/dequantize_x86.h" #endif @@ -80,6 +99,38 @@ extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block); #endif extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block); +#if CONFIG_T8X8 +#ifndef vp8_dequant_block_8x8 +#define vp8_dequant_block_8x8 vp8_dequantize_b_8x8_c +#endif +extern prototype_dequant_block(vp8_dequant_block_8x8); + +#ifndef vp8_dequant_idct_add_8x8 +#define vp8_dequant_idct_add_8x8 vp8_dequant_idct_add_8x8_c +#endif +extern prototype_dequant_idct_add(vp8_dequant_idct_add_8x8); + +#ifndef vp8_dequant_dc_idct_add_8x8 +#define vp8_dequant_dc_idct_add_8x8 vp8_dequant_dc_idct_add_8x8_c +#endif +extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_8x8); + +#ifndef vp8_dequant_dc_idct_add_y_block_8x8 +#define vp8_dequant_dc_idct_add_y_block_8x8 vp8_dequant_dc_idct_add_y_block_8x8_c +#endif +extern prototype_dequant_dc_idct_add_y_block_8x8(vp8_dequant_dc_idct_add_y_block_8x8); + +#ifndef vp8_dequant_idct_add_y_block_8x8 +#define vp8_dequant_idct_add_y_block_8x8 vp8_dequant_idct_add_y_block_8x8_c +#endif +extern prototype_dequant_idct_add_y_block_8x8(vp8_dequant_idct_add_y_block_8x8); + +#ifndef vp8_dequant_idct_add_uv_block_8x8 +#define vp8_dequant_idct_add_uv_block_8x8 vp8_dequant_idct_add_uv_block_8x8_c +#endif +extern prototype_dequant_idct_add_uv_block_8x8(vp8_dequant_idct_add_uv_block_8x8); + +#endif typedef prototype_dequant_block((*vp8_dequant_block_fn_t)); @@ -93,6 +144,13 @@ typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t)) typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t)); +#if CONFIG_T8X8 +typedef prototype_dequant_dc_idct_add_y_block_8x8((*vp8_dequant_dc_idct_add_y_block_fn_t_8x8)); + +typedef prototype_dequant_idct_add_y_block_8x8((*vp8_dequant_idct_add_y_block_fn_t_8x8)); + +typedef prototype_dequant_idct_add_uv_block_8x8((*vp8_dequant_idct_add_uv_block_fn_t_8x8)); +#endif typedef struct { vp8_dequant_block_fn_t block; @@ -101,6 +159,14 @@ typedef struct vp8_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block; vp8_dequant_idct_add_y_block_fn_t idct_add_y_block; vp8_dequant_idct_add_uv_block_fn_t idct_add_uv_block; +#if CONFIG_T8X8 + vp8_dequant_block_fn_t block_8x8; + vp8_dequant_idct_add_fn_t idct_add_8x8; + vp8_dequant_dc_idct_add_fn_t dc_idct_add_8x8; + vp8_dequant_dc_idct_add_y_block_fn_t_8x8 dc_idct_add_y_block_8x8; + vp8_dequant_idct_add_y_block_fn_t_8x8 idct_add_y_block_8x8; + vp8_dequant_idct_add_uv_block_fn_t_8x8 idct_add_uv_block_8x8; +#endif } vp8_dequant_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index 78d583782..6d78e6022 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -26,6 +26,18 @@ DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) = 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X }; +#if CONFIG_T8X8 +DECLARE_ALIGNED(64, static const unsigned char, coef_bands_x_8x8[64]) = { + 0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 4 * OCB_X, 5 * OCB_X, + 5 * OCB_X, 3 * OCB_X, 6 * OCB_X, 3 * OCB_X, 5 * OCB_X, 4 * OCB_X, 6 * OCB_X, 6 * OCB_X, + 6 * OCB_X, 5 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, + 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, + 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, + 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, 7 * OCB_X, +}; +#endif #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 #define ONE_CONTEXT_NODE 2 @@ -149,7 +161,48 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); range = range - split; \ NORMALIZE \ } - +#if CONFIG_T8X8 +#define DECODE_AND_LOOP_IF_ZERO_8x8_2(probability,branch) \ + { \ + split = 1 + ((( probability*(range-1) ) ) >> 8); \ + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ + FILL \ + if ( value < bigsplit ) \ + { \ + range = split; \ + NORMALIZE \ + Prob = coef_probs; \ + if(c<3) {\ + ++c; \ + Prob += coef_bands_x[c]; \ + goto branch; \ + } goto BLOCK_FINISHED_8x8; /*for malformed input */\ + } \ + value -= bigsplit; \ + range = range - split; \ + NORMALIZE \ + } +#define DECODE_AND_LOOP_IF_ZERO_8X8(probability,branch) \ + { \ + split = 1 + ((( probability*(range-1) ) ) >> 8); \ + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ + FILL \ + if ( value < bigsplit ) \ + { \ + range = split; \ + NORMALIZE \ + Prob = coef_probs; \ + if(c<63) {\ + ++c; \ + Prob += coef_bands_x_8x8[c]; \ + goto branch; \ + } goto BLOCK_FINISHED_8x8; /*for malformed input */\ + } \ + value -= bigsplit; \ + range = range - split; \ + NORMALIZE \ + } +#endif #define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \ DECODE_AND_APPLYSIGN(val) \ Prob = coef_probs + (ENTROPY_NODES*2); \ @@ -160,6 +213,26 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); qcoeff_ptr [ scan[15] ] = (INT16) v; \ goto BLOCK_FINISHED; +#if CONFIG_T8X8 +#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val) \ + DECODE_AND_APPLYSIGN(val) \ + Prob = coef_probs + (ENTROPY_NODES*2); \ + if(c < 3){\ + qcoeff_ptr [ scan[c] ] = (INT16) v; \ + ++c; \ + goto DO_WHILE_8x8; }\ + qcoeff_ptr [ scan[3] ] = (INT16) v; \ + goto BLOCK_FINISHED_8x8; +#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val) \ + DECODE_AND_APPLYSIGN(val) \ + Prob = coef_probs + (ENTROPY_NODES*2); \ + if(c < 63){\ + qcoeff_ptr [ scan[c] ] = (INT16) v; \ + ++c; \ + goto DO_WHILE_8x8; }\ + qcoeff_ptr [ scan[63] ] = (INT16) v; \ + goto BLOCK_FINISHED_8x8; +#endif #define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\ split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \ @@ -177,6 +250,354 @@ DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); }\ NORMALIZE +#if CONFIG_T8X8 +int vp8_decode_mb_tokens_8x8(VP8D_COMP *dx, MACROBLOCKD *x) +{ + ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context; + const VP8_COMMON *const oc = & dx->common; + + BOOL_DECODER *bc = x->current_bc; + + char *eobs = x->eobs; + + ENTROPY_CONTEXT *a, *a1; + ENTROPY_CONTEXT *l, *l1; + int i; + + int eobtotal = 0; + + register int count; + + const BOOL_DATA *bufptr; + const BOOL_DATA *bufend; + register unsigned int range; + VP8_BD_VALUE value; + const int *scan;// + register unsigned int shift; + UINT32 split; + VP8_BD_VALUE bigsplit; + INT16 *qcoeff_ptr; + + const vp8_prob *coef_probs;// + int type; + int stop; + INT16 val, bits_count; + INT16 c; + INT16 v; + const vp8_prob *Prob;// + + type = 3; + i = 0; + stop = 16; + + scan = vp8_default_zig_zag1d_8x8; + qcoeff_ptr = &x->qcoeff[0]; + + if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) + { + i = 24; + stop = 24; + type = 1; + qcoeff_ptr += 24*16; + eobtotal -= 4; + scan = vp8_default_zig_zag1d; + } + + bufend = bc->user_buffer_end; + bufptr = bc->user_buffer; + value = bc->value; + count = bc->count; + range = bc->range; + + coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0]; + +BLOCK_LOOP_8x8: + a = A + vp8_block2above[i]; + l = L + vp8_block2left[i]; + + if(i < 16) + { + a1 = A + vp8_block2above[i+1]; + l1 = L + vp8_block2left[i+4]; + } + else if(i<24) + { + a1 = A + vp8_block2above[i+1]; + l1 = L + vp8_block2left[i+2]; + + } + c = (INT16)(!type); + +// Dest = ((A)!=0) + ((B)!=0); + if(i==24) + { + VP8_COMBINEENTROPYCONTEXTS(v, *a, *l); + } + else + { + VP8_COMBINEENTROPYCONTEXTS_8x8(v, *a, *l, *a1, *l1); + } + + Prob = coef_probs; + Prob += v * ENTROPY_NODES; + +DO_WHILE_8x8: + if(i==24) + Prob += coef_bands_x[c]; + else + Prob += coef_bands_x_8x8[c]; + DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED_8x8); + +CHECK_0_8x8_: + if (i==24) + { + DECODE_AND_LOOP_IF_ZERO_8x8_2(Prob[ZERO_CONTEXT_NODE], CHECK_0_8x8_); + } + else + { + DECODE_AND_LOOP_IF_ZERO_8X8(Prob[ZERO_CONTEXT_NODE], CHECK_0_8x8_); + } + DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_8x8_); + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val; + bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length; + + do + { + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count); + bits_count -- ; + } + while (bits_count >= 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +CAT_FIVE_CONTEXT_NODE_0_8x8_: + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +CAT_THREEFOUR_CONTEXT_NODE_0_8x8_: + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_8x8_); + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +CAT_THREE_CONTEXT_NODE_0_8x8_: + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +HIGH_LOW_CONTEXT_NODE_0_8x8_: + DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_8x8_); + + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1); + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +CAT_ONE_CONTEXT_NODE_0_8x8_: + val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val; + DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(val); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(val); + } + +LOW_VAL_CONTEXT_NODE_0_8x8_: + DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_8x8_); + DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_8x8_); + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(4); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(4); + } + + +THREE_CONTEXT_NODE_0_8x8_: + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(3); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(3); + } + + +TWO_CONTEXT_NODE_0_8x8_: + if(i==24) + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8_2(2); + } + else + { + DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT_8x8(2); + } + + +ONE_CONTEXT_NODE_0_8x8_: + DECODE_AND_APPLYSIGN(1); + Prob = coef_probs + ENTROPY_NODES; + + if (i==24) + { + if (c < 3)//15 + { + qcoeff_ptr [ scan[c] ] = (INT16) v; + ++c; + goto DO_WHILE_8x8; + } + } + else + { + if (c < 63) + { + qcoeff_ptr [ scan[c] ] = (INT16) v; + ++c; + goto DO_WHILE_8x8; + } + } + + if(i==24) + qcoeff_ptr [ scan[3] ] = (INT16) v;//15 + else + qcoeff_ptr [ scan[63] ] = (INT16) v; + + +BLOCK_FINISHED_8x8: + *a = *l = ((eobs[i] = c) != !type); // any nonzero data? + /*if (i!=24) { + *(A + vp8_block2above[i+1]) = *(A + vp8_block2above[i+2]) = *(A + vp8_block2above[i+3]) = *a; + *(L + vp8_block2left[i+1]) = *(L + vp8_block2left[i+2]) = *(L + vp8_block2left[i+3]) = *l; + }*/ + + if (i!=24) + { + if(i==0) + { + *(A + vp8_block2above[1]) = *(A + vp8_block2above[4]) = *(A + vp8_block2above[5]) = *a; + *(L + vp8_block2left[1]) = *(L + vp8_block2left[4]) = *(L + vp8_block2left[5]) = *l; + } + else if(i==4) + { + *(A + vp8_block2above[2]) = *(A + vp8_block2above[3]) = *(A + vp8_block2above[6]) = *(A + vp8_block2above[7]) = *a; + *(L + vp8_block2left[2]) = *(L + vp8_block2left[3]) = *(L + vp8_block2left[6]) = *(L + vp8_block2left[7]) = *l; + *(A + vp8_block2above[4]) = *(A + vp8_block2above[1]); + *(L + vp8_block2left[4]) = *(L + vp8_block2left[1]); + } + else if(i==8) + { + *(A + vp8_block2above[9]) = *(A + vp8_block2above[12]) = *(A + vp8_block2above[13]) = *a; + *(L + vp8_block2left[9]) = *(L + vp8_block2left[12]) = *(L + vp8_block2left[13]) = *l; + + } + else if(i==12) + { + *(A + vp8_block2above[10]) = *(A + vp8_block2above[11]) = *(A + vp8_block2above[14]) = *(A + vp8_block2above[15]) = *a; + *(L + vp8_block2left[10]) = *(L + vp8_block2left[11]) = *(L + vp8_block2left[14]) = *(L + vp8_block2left[15]) = *l; + *(A + vp8_block2above[12]) = *(A + vp8_block2above[8]); + *(L + vp8_block2left[12]) = *(L + vp8_block2left[8]); + + } + else + { + *(A + vp8_block2above[i+1]) = *(A + vp8_block2above[i+2]) = *(A + vp8_block2above[i+3]) = *a; + *(L + vp8_block2left[i+1]) = *(L + vp8_block2left[i+2]) = *(L + vp8_block2left[i+3]) = *l; + + } + } + + eobtotal += c; + qcoeff_ptr += (i==24 ? 16 : 64); + + i+=4; + + if (i < stop) + goto BLOCK_LOOP_8x8; + + if (i > 24) + { + type = 0; + i = 0; + stop = 16; + coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0]; + qcoeff_ptr -= (24*16 + 16); + scan = vp8_default_zig_zag1d_8x8; + goto BLOCK_LOOP_8x8; + } + + if (i == 16) + { + type = 2; + coef_probs = oc->fc.coef_probs_8x8 [type] [ 0 ] [0]; + stop = 24; + goto BLOCK_LOOP_8x8; + } + + FILL + bc->user_buffer = bufptr; + bc->value = value; + bc->count = count; + bc->range = range; + + return eobtotal; + +} +#endif int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) { ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; @@ -366,6 +787,7 @@ BLOCK_FINISHED: bc->value = value; bc->count = count; bc->range = range; + return eobtotal; } diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h index 8640bda4c..c5305bb67 100644 --- a/vp8/decoder/detokenize.h +++ b/vp8/decoder/detokenize.h @@ -16,5 +16,8 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x); int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); +#if CONFIG_T8X8 +int vp8_decode_mb_tokens_8x8(VP8D_COMP *, MACROBLOCKD *); +#endif #endif /* DETOKENIZE_H */ diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c index f76653231..fc5fdb39b 100644 --- a/vp8/decoder/generic/dsystemdependent.c +++ b/vp8/decoder/generic/dsystemdependent.c @@ -21,6 +21,17 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi) /* Pure C: */ #if CONFIG_RUNTIME_CPU_DETECT pbi->mb.rtcd = &pbi->common.rtcd; + +#if CONFIG_T8X8 + + pbi->dequant.block_8x8 = vp8_dequantize_b_8x8_c; + pbi->dequant.idct_add_8x8 = vp8_dequant_idct_add_8x8_c; + pbi->dequant.dc_idct_add_8x8 = vp8_dequant_dc_idct_add_8x8_c; + pbi->dequant.dc_idct_add_y_block_8x8 = vp8_dequant_dc_idct_add_y_block_8x8_c; + pbi->dequant.idct_add_y_block_8x8 = vp8_dequant_idct_add_y_block_8x8_c; + pbi->dequant.idct_add_uv_block_8x8 = vp8_dequant_idct_add_uv_block_8x8_c; + +#endif pbi->dequant.block = vp8_dequantize_b_c; pbi->dequant.idct_add = vp8_dequant_idct_add_c; pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_c; diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c index df0192354..44a223157 100644 --- a/vp8/decoder/idct_blk.c +++ b/vp8/decoder/idct_blk.c @@ -30,10 +30,13 @@ void vp8_dequant_dc_idct_add_y_block_c { for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp8_dequant_dc_idct_add_c (q, dq, pre, dst, 16, stride, dc[0]); - else + if (*eobs++ <= 1) + { vp8_dc_only_idct_add_c (dc[0], pre, dst, 16, stride); + } + + else + vp8_dequant_dc_idct_add_c (q, dq, pre, dst, 16, stride, dc[0]); q += 16; pre += 4; @@ -56,14 +59,13 @@ void vp8_dequant_idct_add_y_block_c { for (j = 0; j < 4; j++) { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, pre, dst, 16, stride); - else + if (*eobs++ <= 1) { vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dst, 16, stride); ((int *)q)[0] = 0; } - + else + vp8_dequant_idct_add_c (q, dq, pre, dst, 16, stride); q += 16; pre += 4; dst += 4; @@ -84,14 +86,13 @@ void vp8_dequant_idct_add_uv_block_c { for (j = 0; j < 2; j++) { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, pre, dstu, 8, stride); - else + if (*eobs++ <= 1) { vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstu, 8, stride); ((int *)q)[0] = 0; } - + else + vp8_dequant_idct_add_c (q, dq, pre, dstu, 8, stride); q += 16; pre += 4; dstu += 4; @@ -105,14 +106,14 @@ void vp8_dequant_idct_add_uv_block_c { for (j = 0; j < 2; j++) { - if (*eobs++ > 1) - vp8_dequant_idct_add_c (q, dq, pre, dstv, 8, stride); - else + if (*eobs++ <= 1) { vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstv, 8, stride); ((int *)q)[0] = 0; } + else + vp8_dequant_idct_add_c (q, dq, pre, dstv, 8, stride); q += 16; pre += 4; dstv += 4; @@ -122,3 +123,45 @@ void vp8_dequant_idct_add_uv_block_c dstv += 4*stride - 8; } } + +#if CONFIG_T8X8 +void vp8_dequant_dc_idct_add_y_block_8x8_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, short *dc, MACROBLOCKD *xd) +{ + + vp8_dequant_dc_idct_add_8x8_c (q, dq, pre, dst, 16, stride, dc[0]); + vp8_dequant_dc_idct_add_8x8_c (&q[64], dq, pre+8, dst+8, 16, stride, dc[1]); + vp8_dequant_dc_idct_add_8x8_c (&q[128], dq, pre+8*16, dst+8*stride, 16, stride, dc[4]); + vp8_dequant_dc_idct_add_8x8_c (&q[192], dq, pre+8*16+8, dst+8*stride+8, 16, stride, dc[8]); + +} + +void vp8_dequant_idct_add_y_block_8x8_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, MACROBLOCKD *xd) +{ + + + unsigned char *origdest = dst; + unsigned char *origpred = pre; + + vp8_dequant_idct_add_8x8_c (q, dq, pre, dst, 16, stride); + vp8_dequant_idct_add_8x8_c (&q[64], dq, origpred+8, origdest+8, 16, stride); + vp8_dequant_idct_add_8x8_c (&q[128], dq, origpred+8*16, origdest+8*stride, 16, stride); + vp8_dequant_idct_add_8x8_c (&q[192], dq, origpred+8*16+8, origdest+8*stride+8, 16, stride); + +} + +void vp8_dequant_idct_add_uv_block_8x8_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs, MACROBLOCKD *xd) +{ + vp8_dequant_idct_add_8x8_c (q, dq, pre, dstu, 8, stride); + + q += 64; + pre += 64; + + vp8_dequant_idct_add_8x8_c (q, dq, pre, dstv, 8, stride); +} +#endif diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index f6052b437..f46b6d578 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -43,6 +43,43 @@ extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); static int get_free_fb (VP8_COMMON *cm); static void ref_cnt_fb (int *buf, int *idx, int new_idx); +#if CONFIG_DEBUG +void vp8_recon_write_yuv_frame(char *name, YV12_BUFFER_CONFIG *s) +{ + FILE *yuv_file = fopen((char *)name, "ab"); + unsigned char *src = s->y_buffer; + int h = s->y_height; + + do + { + fwrite(src, s->y_width, 1, yuv_file); + src += s->y_stride; + } + while (--h); + + src = s->u_buffer; + h = s->uv_height; + + do + { + fwrite(src, s->uv_width, 1, yuv_file); + src += s->uv_stride; + } + while (--h); + + src = s->v_buffer; + h = s->uv_height; + + do + { + fwrite(src, s->uv_width, 1, yuv_file); + src += s->uv_stride; + } + while (--h); + + fclose(yuv_file); +} +#endif void vp8dx_initialize() { @@ -120,7 +157,7 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr) if (!pbi) return; #if CONFIG_SEGMENTATION - // Delete sementation map + // Delete sementation map if (pbi->segmentation_map != 0) vpx_free(pbi->segmentation_map); #endif @@ -298,6 +335,22 @@ static int swap_frame_buffers (VP8_COMMON *cm) return err; } +/* +static void vp8_print_yuv_rec_mb(VP8_COMMON *cm, int mb_row, int mb_col) +{ + YV12_BUFFER_CONFIG *s = cm->frame_to_show; + unsigned char *src = s->y_buffer; + int i, j; + + printf("After loop filter\n"); + for (i=0;i<16;i++) { + for (j=0;j<16;j++) + printf("%3d ", src[(mb_row*16+i)*s->y_stride + mb_col*16+j]); + printf("\n"); + } +} +*/ + int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp) { #if HAVE_ARMV7 @@ -461,6 +514,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign { /* Apply the loop filter if appropriate. */ vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level); + //vp8_print_yuv_rec_mb(cm, 9, 10); + cm->last_frame_type = cm->frame_type; cm->last_filter_type = cm->filter_type; @@ -469,7 +524,9 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); } +#if CONFIG_DEBUG vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show); +#endif vp8_clear_system_state(); @@ -498,6 +555,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/ + //printf("Decoded frame (%d) %d\n", cm->show_frame, cm->current_video_frame); if (cm->show_frame) cm->current_video_frame++; diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index 591fa4f23..01607df1d 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -21,6 +21,8 @@ #include "ec_types.h" #endif +//#define DEC_DEBUG + typedef struct { int ithread; @@ -52,11 +54,17 @@ typedef struct typedef struct { int const *scan; +#if CONFIG_T8X8 + int const *scan_8x8; +#endif UINT8 const *ptr_block2leftabove; vp8_tree_index const *vp8_coef_tree_ptr; TOKENEXTRABITS const *teb_base_ptr; unsigned char *norm_ptr; UINT8 *ptr_coef_bands_x; +#if CONFIG_T8X8 + UINT8 *ptr_coef_bands_x_8x8; +#endif ENTROPY_CONTEXT_PLANES *A; ENTROPY_CONTEXT_PLANES *L; @@ -65,6 +73,9 @@ typedef struct BOOL_DECODER *current_bc; vp8_prob const *coef_probs[4]; +#if CONFIG_T8X8 + vp8_prob const *coef_probs_8x8[4]; +#endif UINT8 eob[25]; diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index 5bdeb917f..3541269c5 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -53,11 +53,19 @@ unsigned __int64 Sectionbits[500]; #ifdef ENTROPY_STATS int intra_mode_stats[10][10][10]; static unsigned int tree_update_hist [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; +#if CONFIG_T8X8 +static unsigned int tree_update_hist_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] [2]; +#endif + extern unsigned int active_section; #endif #ifdef MODE_STATS int count_mb_seg[4] = { 0, 0, 0, 0 }; +#if CONFIG_SEGMENTATION +int segment_modes_intra[MAX_MB_SEGMENTS] = { 0, 0, 0, 0 }; +int segment_modes_inter[MAX_MB_SEGMENTS] = { 0, 0, 0, 0 }; +#endif #endif @@ -184,6 +192,10 @@ static void pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount) n--; i = 2; } + if (n == 0) { + printf("Fatal Error: n=0\n"); + fflush(stdout); + } do { @@ -860,8 +872,6 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) const MV_CONTEXT *mvc = pc->fc.mvc; MACROBLOCKD *xd = &cpi->mb.e_mbd; #if CONFIG_SEGMENTATION - int left_id, above_id; - int i; int sum; int index = 0; #endif @@ -948,6 +958,11 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) active_section = 9; #endif +#ifdef MODE_STATS +#if CONFIG_SEGMENTATION + segment_modes_inter[mi->segment_id]++; +#endif +#endif if (cpi->mb.e_mbd.update_mb_segmentation_map) { #if CONFIG_SEGMENTATION @@ -1117,8 +1132,6 @@ static void write_kfmodes(VP8_COMP *cpi) /* const */ MODE_INFO *m = c->mi; #if CONFIG_SEGMENTATION - int left_id, above_id; - int i; int index = 0; #endif int mb_row = -1; @@ -1150,6 +1163,12 @@ static void write_kfmodes(VP8_COMP *cpi) xd->up_available = (mb_row != 0); xd->left_available = (mb_col != 0); #endif +#ifdef MODE_STATS +#if CONFIG_SEGMENTATION + segment_modes_intra[m->mbmi.segment_id]++; +#endif +#endif + if (cpi->mb.e_mbd.update_mb_segmentation_map) { #if CONFIG_SEGMENTATION @@ -1389,6 +1408,7 @@ static int default_coef_context_savings(VP8_COMP *cpi) int vp8_estimate_entropy_savings(VP8_COMP *cpi) { int savings = 0; + int i=0; const int *const rfct = cpi->count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; @@ -1453,6 +1473,65 @@ int vp8_estimate_entropy_savings(VP8_COMP *cpi) savings += default_coef_context_savings(cpi); +#if CONFIG_T8X8 + i = 0; + do + { + int j = 0; + + do + { + int k = 0; + + do + { + /* at every context */ + + /* calc probs and branch cts for this frame only */ + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; + + int t = 0; /* token/prob index */ + + vp8_tree_probs_from_distribution( + MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + cpi->frame_coef_probs_8x8 [i][j][k], cpi->frame_branch_ct_8x8 [i][j][k], cpi->coef_counts_8x8 [i][j][k], + 256, 1 + ); + + do + { + const unsigned int *ct = cpi->frame_branch_ct_8x8 [i][j][k][t]; + const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t]; + + const vp8_prob old = cpi->common.fc.coef_probs_8x8 [i][j][k][t]; + const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t]; + + const int old_b = vp8_cost_branch(ct, old); + const int new_b = vp8_cost_branch(ct, newp); + + const int update_b = 8 + + ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8); + + const int s = old_b - new_b - update_b; + + if (s > 0) + savings += s; + + + } + while (++t < MAX_ENTROPY_TOKENS - 1); + + + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++j < COEF_BANDS); + } + while (++i < BLOCK_TYPES); +#endif + + return savings; } @@ -1579,6 +1658,92 @@ static void update_coef_probs(VP8_COMP *cpi) } while (++i < BLOCK_TYPES); +#if CONFIG_T8X8 + i = 0; + do + { + int j = 0; + + do + { + int k = 0; + + do + { + //note: use result from vp8_estimate_entropy_savings, so no need to call vp8_tree_probs_from_distribution here. + /* at every context */ + + /* calc probs and branch cts for this frame only */ + //vp8_prob new_p [ENTROPY_NODES]; + //unsigned int branch_ct [ENTROPY_NODES] [2]; + + int t = 0; /* token/prob index */ + + //vp8_tree_probs_from_distribution( + // MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, + // new_p, branch_ct, (unsigned int *)cpi->coef_counts [i][j][k], + // 256, 1 + // ); + + do + { + const unsigned int *ct = cpi->frame_branch_ct_8x8 [i][j][k][t]; + const vp8_prob newp = cpi->frame_coef_probs_8x8 [i][j][k][t]; + + vp8_prob *Pold = cpi->common.fc.coef_probs_8x8 [i][j][k] + t; + const vp8_prob old = *Pold; + const vp8_prob upd = vp8_coef_update_probs_8x8 [i][j][k][t]; + + const int old_b = vp8_cost_branch(ct, old); + const int new_b = vp8_cost_branch(ct, newp); + + const int update_b = 8 + + ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8); + + const int s = old_b - new_b - update_b; + const int u = s > 0 ? 1 : 0; + + vp8_write(w, u, upd); + + +#ifdef ENTROPY_STATS + ++ tree_update_hist_8x8 [i][j][k][t] [u]; +#endif + + if (u) + { + /* send/use new probability */ + + *Pold = newp; + vp8_write_literal(w, newp, 8); + + savings += s; + + } + + } + while (++t < MAX_ENTROPY_TOKENS - 1); + + /* Accum token counts for generation of default statistics */ +#ifdef ENTROPY_STATS + t = 0; + + do + { + context_counters_8x8 [i][j][k][t] += cpi->coef_counts_8x8 [i][j][k][t]; + } + while (++t < MAX_ENTROPY_TOKENS); + +#endif + + } + while (++k < PREV_COEF_CONTEXTS); + } + while (++j < COEF_BANDS); + } + while (++i < BLOCK_TYPES); +#endif + } #ifdef PACKET_TESTING FILE *vpxlogc = 0; @@ -1990,6 +2155,46 @@ void print_tree_update_probs() } fprintf(f, "};\n"); + +#if CONFIG_T8X8 + fprintf(f, "const vp8_prob tree_update_probs_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] = {\n"); + + for (i = 0; i < BLOCK_TYPES; i++) + { + fprintf(f, " { \n"); + + for (j = 0; j < COEF_BANDS; j++) + { + fprintf(f, " {\n"); + + for (k = 0; k < PREV_COEF_CONTEXTS; k++) + { + fprintf(f, " {"); + + for (l = 0; l < MAX_ENTROPY_TOKENS - 1; l++) + { + Sum = tree_update_hist_8x8[i][j][k][l][0] + tree_update_hist_8x8[i][j][k][l][1]; + + if (Sum > 0) + { + if (((tree_update_hist_8x8[i][j][k][l][0] * 255) / Sum) > 0) + fprintf(f, "%3ld, ", (tree_update_hist_8x8[i][j][k][l][0] * 255) / Sum); + else + fprintf(f, "%3ld, ", 1); + } + else + fprintf(f, "%3ld, ", 128); + } + + fprintf(f, "},\n"); + } + + fprintf(f, " },\n"); + } + + fprintf(f, " },\n"); + } +#endif fclose(f); } #endif diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 0d14b545c..8a95db798 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -46,7 +46,7 @@ typedef struct int src; int src_stride; -// MV enc_mv; + // MV enc_mv; int force_empty; } BLOCK; @@ -126,6 +126,12 @@ typedef struct void (*short_walsh4x4)(short *input, short *output, int pitch); void (*quantize_b)(BLOCK *b, BLOCKD *d); void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1); + #if CONFIG_T8X8 + void (*vp8_short_fdct8x8)(short *input, short *output, int pitch); + void (*short_fhaar2x2)(short *input, short *output, int pitch); + void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d); + void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d); +#endif } MACROBLOCK; diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index 69a882c89..9584dc765 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -11,6 +11,121 @@ #include #include "vpx_ports/config.h" + + + + + + +void vp8_short_fdct8x8_c(short *block, short *coefs, int pitch) +{ + int j1, i, j, k; + float b[8]; + float b1[8]; + float d[8][8]; + float f0 = (float) .7071068; + float f1 = (float) .4903926; + float f2 = (float) .4619398; + float f3 = (float) .4157348; + float f4 = (float) .3535534; + float f5 = (float) .2777851; + float f6 = (float) .1913417; + float f7 = (float) .0975452; + pitch = pitch / 2; + for (i = 0, k = 0; i < 8; i++, k += pitch) + { + for (j = 0; j < 8; j++) + { + b[j] = (float)( block[k + j]<<1); + } + /* Horizontal transform */ + for (j = 0; j < 4; j++) + { + j1 = 7 - j; + b1[j] = b[j] + b[j1]; + b1[j1] = b[j] - b[j1]; + } + b[0] = b1[0] + b1[3]; + b[1] = b1[1] + b1[2]; + b[2] = b1[1] - b1[2]; + b[3] = b1[0] - b1[3]; + b[4] = b1[4]; + b[5] = (b1[6] - b1[5]) * f0; + b[6] = (b1[6] + b1[5]) * f0; + b[7] = b1[7]; + d[i][0] = (b[0] + b[1]) * f4; + d[i][4] = (b[0] - b[1]) * f4; + d[i][2] = b[2] * f6 + b[3] * f2; + d[i][6] = b[3] * f6 - b[2] * f2; + b1[4] = b[4] + b[5]; + b1[7] = b[7] + b[6]; + b1[5] = b[4] - b[5]; + b1[6] = b[7] - b[6]; + d[i][1] = b1[4] * f7 + b1[7] * f1; + d[i][5] = b1[5] * f3 + b1[6] * f5; + d[i][7] = b1[7] * f7 - b1[4] * f1; + d[i][3] = b1[6] * f3 - b1[5] * f5; + } + /* Vertical transform */ + for (i = 0; i < 8; i++) + { + for (j = 0; j < 4; j++) + { + j1 = 7 - j; + b1[j] = d[j][i] + d[j1][i]; + b1[j1] = d[j][i] - d[j1][i]; + } + b[0] = b1[0] + b1[3]; + b[1] = b1[1] + b1[2]; + b[2] = b1[1] - b1[2]; + b[3] = b1[0] - b1[3]; + b[4] = b1[4]; + b[5] = (b1[6] - b1[5]) * f0; + b[6] = (b1[6] + b1[5]) * f0; + b[7] = b1[7]; + d[0][i] = (b[0] + b[1]) * f4; + d[4][i] = (b[0] - b[1]) * f4; + d[2][i] = b[2] * f6 + b[3] * f2; + d[6][i] = b[3] * f6 - b[2] * f2; + b1[4] = b[4] + b[5]; + b1[7] = b[7] + b[6]; + b1[5] = b[4] - b[5]; + b1[6] = b[7] - b[6]; + d[1][i] = b1[4] * f7 + b1[7] * f1; + d[5][i] = b1[5] * f3 + b1[6] * f5; + d[7][i] = b1[7] * f7 - b1[4] * f1; + d[3][i] = b1[6] * f3 - b1[5] * f5; + } + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + *(coefs + j + i * 8) = (short) floor(d[i][j] +0.5); + } + } + return; +} + + + +void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) //pitch = 8 +{ + /* [1 1 ; 1 -1] orthogonal transform */ + /* use position: 0,1, 4, 8 */ + int i; + short *ip1 = input; + short *op1 = output; + for (i = 0; i < 16; i++) + { + op1[i] = 0; + } + + op1[0]=ip1[0] + ip1[1] + ip1[4] + ip1[8]; + op1[1]=ip1[0] - ip1[1] + ip1[4] - ip1[8]; + op1[4]=ip1[0] + ip1[1] - ip1[4] - ip1[8]; + op1[8]=ip1[0] - ip1[1] - ip1[4] + ip1[8]; + +} void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { int i; @@ -61,11 +176,6 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch) } } -void vp8_short_fdct8x4_c(short *input, short *output, int pitch) -{ - vp8_short_fdct4x4_c(input, output, pitch); - vp8_short_fdct4x4_c(input + 4, output + 16, pitch); -} void vp8_short_walsh4x4_c(short *input, short *output, int pitch) { @@ -84,7 +194,7 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) c1 = ((ip[1] - ip[3])<<2); b1 = ((ip[0] - ip[2])<<2); - op[0] = a1 + d1 + (a1!=0); + op[0] = a1 + d1+ (a1!=0); #else a1 = ((ip[0] + ip[2])); d1 = ((ip[1] + ip[3])); @@ -136,3 +246,9 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) op++; } } + +void vp8_short_fdct8x4_c(short *input, short *output, int pitch) +{ + vp8_short_fdct4x4_c(input, output, pitch); + vp8_short_fdct4x4_c(input + 4, output + 16, pitch); +} diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h index fec3b4c37..c37d47aca 100644 --- a/vp8/encoder/dct.h +++ b/vp8/encoder/dct.h @@ -22,6 +22,20 @@ #include "arm/dct_arm.h" #endif +#if CONFIG_T8X8 + +#ifndef vp8_fdct_short8x8 +#define vp8_fdct_short8x8 vp8_short_fdct8x8_c +#endif +extern prototype_fdct(vp8_fdct_short8x8); + +#ifndef vp8_fhaar_short2x2 +#define vp8_fhaar_short2x2 vp8_short_fhaar2x2_c +#endif +extern prototype_fdct(vp8_fhaar_short2x2); + +#endif + #ifndef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_c #endif @@ -49,6 +63,10 @@ extern prototype_fdct(vp8_fdct_walsh_short4x4); typedef prototype_fdct(*vp8_fdct_fn_t); typedef struct { +#if CONFIG_T8X8 + vp8_fdct_fn_t short8x8; + vp8_fdct_fn_t haar_short2x2; +#endif vp8_fdct_fn_t short4x4; vp8_fdct_fn_t short8x4; vp8_fdct_fn_t fast4x4; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index d7aed0a3a..b62544af5 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -26,6 +26,7 @@ #include "vp8/common/findnearmv.h" #include "vp8/common/reconintra.h" #include +#include #include #include "vp8/common/subpixel.h" #include "vpx_ports/vpx_timer.h" @@ -45,6 +46,11 @@ #define SEEK_DIFFID 7 #endif +#ifdef ENC_DEBUG +int enc_debug=0; +int mb_row_debug, mb_col_debug; +#endif + extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex); @@ -60,6 +66,8 @@ int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); static void adjust_act_zbin( VP8_COMP *cpi, MACROBLOCK *x ); + + #ifdef MODE_STATS unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; unsigned int inter_uv_modes[4] = {0, 0, 0, 0}; @@ -88,6 +96,186 @@ static const unsigned char VP8_VAR_OFFS[16]= }; + +#if CONFIG_T8X8 + +//INTRA mode transform size +//When all three criteria are off the default is 4x4 +//#define INTRA_VARIANCE_ENTROPY_CRITERIA +#define INTRA_WTD_SSE_ENTROPY_CRITERIA +//#define INTRA_TEST_8X8_ONLY +// +//INTER mode transform size +//When all three criteria are off the default is 4x4 +//#define INTER_VARIANCE_ENTROPY_CRITERIA +#define INTER_WTD_SSE_ENTROPY_CRITERIA +//#define INTER_TEST_8X8_ONLY + +double variance_Block(short *b1, int pitch, int dimension) +{ + short ip[8][8]={{0}}; + short *b = b1; + int i, j = 0; + double mean = 0.0, variance = 0.0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + ip[i][j] = b[j]; + mean += ip[i][j]; + } + b += pitch; + } + mean /= (dimension*dimension); + + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + variance += (ip[i][j]-mean)*(ip[i][j]-mean); + } + } + variance /= (dimension*dimension); + return variance; +} + +double mean_Block(short *b, int pitch, int dimension) +{ + short ip[8][8]={{0}}; + int i, j = 0; + double mean = 0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + ip[i][j] = b[j]; + mean += ip[i][j]; + } + b += pitch; + } + mean /= (dimension*dimension); + + return mean; +} + +int SSE_Block(short *b, int pitch, int dimension) +{ + int i, j, sse_block = 0; + for (i = 0; i < dimension; i++) + { + for (j = 0; j < dimension; j++) + { + sse_block += b[j]*b[j]; + } + b += pitch; + } + return sse_block; +} + +double Compute_Variance_Entropy(MACROBLOCK *x) +{ + double variance_8[4] = {0.0, 0.0, 0.0, 0.0}, sum_var = 0.0, all_entropy = 0.0; + variance_8[0] = variance_Block(x->block[0].src_diff, 16, 8); + variance_8[1] = variance_Block(x->block[2].src_diff, 16, 8); + variance_8[2] = variance_Block(x->block[8].src_diff, 16, 8); + variance_8[3] = variance_Block(x->block[10].src_diff, 16, 8); + sum_var = variance_8[0] + variance_8[1] + variance_8[2] + variance_8[3]; + if(sum_var) + { + int i; + for(i = 0; i <4; i++) + { + if(variance_8[i]) + { + variance_8[i] /= sum_var; + all_entropy -= variance_8[i]*log(variance_8[i]); + } + } + } + return (all_entropy /log(2)); +} + +double Compute_Wtd_SSE_SubEntropy(MACROBLOCK *x) +{ + double variance_8[4] = {0.0, 0.0, 0.0, 0.0}; + double entropy_8[4] = {0.0, 0.0, 0.0, 0.0}; + double sse_1, sse_2, sse_3, sse_4, sse_0; + int i; + for (i=0;i<3;i+=2) + { + sse_0 = SSE_Block(x->block[i].src_diff, 16, 8); + if(sse_0) + { + sse_1 = SSE_Block(x->block[i].src_diff, 16, 4)/sse_0; + sse_2 = SSE_Block(x->block[i+1].src_diff, 16, 4)/sse_0; + sse_3 = SSE_Block(x->block[i+4].src_diff, 16, 4)/sse_0; + sse_4 = SSE_Block(x->block[i+5].src_diff, 16, 4)/sse_0; + variance_8[i]= variance_Block(x->block[i].src_diff, 16, 8); + if(sse_1 && sse_2 && sse_3 && sse_4) + entropy_8[i]= (-sse_1*log(sse_1) + -sse_2*log(sse_2) + -sse_3*log(sse_3) + -sse_4*log(sse_4))/log(2); + } + } + for (i=8;i<11;i+=2) + { + if(sse_0) + { + sse_0 = SSE_Block(x->block[i].src_diff, 16, 8); + sse_1 = SSE_Block(x->block[i].src_diff, 16, 4)/sse_0; + sse_2 = SSE_Block(x->block[i+1].src_diff, 16, 4)/sse_0; + sse_3 = SSE_Block(x->block[i+4].src_diff, 16, 4)/sse_0; + sse_4 = SSE_Block(x->block[i+5].src_diff, 16, 4)/sse_0; + variance_8[i-7]= variance_Block(x->block[i].src_diff, 16, 8); + if(sse_1 && sse_2 && sse_3 && sse_4) + entropy_8[i-7]= (-sse_1*log(sse_1) + -sse_2*log(sse_2) + -sse_3*log(sse_3) + -sse_4*log(sse_4))/log(2); + } + } + if(variance_8[0]+variance_8[1]+variance_8[2]+variance_8[3]) + return (entropy_8[0]*variance_8[0]+ + entropy_8[1]*variance_8[1]+ + entropy_8[2]*variance_8[2]+ + entropy_8[3]*variance_8[3])/ + (variance_8[0]+ + variance_8[1]+ + variance_8[2]+ + variance_8[3]); + else + return 0; +} + +int vp8_8x8_selection_intra(MACROBLOCK *x) +{ +#ifdef INTRA_VARIANCE_ENTROPY_CRITERIA + return (Compute_Variance_Entropy(x) > 1.2); +#elif defined(INTRA_WTD_SSE_ENTROPY_CRITERIA) + return (Compute_Wtd_SSE_SubEntropy(x) > 1.2); +#elif defined(INTRA_TEST_8X8_ONLY) + return 1; +#else + return 0; //when all criteria are off use the default 4x4 only +#endif +} + +int vp8_8x8_selection_inter(MACROBLOCK *x) +{ +#ifdef INTER_VARIANCE_ENTROPY_CRITERIA + return (Compute_Variance_Entropy(x) > 1.5); +#elif defined(INTER_WTD_SSE_ENTROPY_CRITERIA) + return (Compute_Wtd_SSE_SubEntropy(x) > 1.5); +#elif defined(INTER_TEST_8X8_ONLY) + return 1; +#else + return 0; //when all criteria are off use the default 4x4 only +#endif +} + +#endif + // Original activity measure from Tim T's code. static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x ) { @@ -385,7 +573,6 @@ void encode_mb_row(VP8_COMP *cpi, int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cpi->common.mb_cols); #if CONFIG_SEGMENTATION - int left_id, above_id; int sum; #endif #if CONFIG_MULTITHREAD @@ -425,6 +612,12 @@ void encode_mb_row(VP8_COMP *cpi, // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { +#ifdef ENC_DEBUG + //enc_debug = (cpi->count==29 && mb_row==5 && mb_col==0); + enc_debug = (cpi->count==4 && mb_row==17 && mb_col==13); + mb_col_debug=mb_col; + mb_row_debug=mb_row; +#endif // Distance of Mb to the left & right edges, specified in // 1/8th pel units as they are always compared to values // that are in 1/8th pel units @@ -471,8 +664,9 @@ void encode_mb_row(VP8_COMP *cpi, if (xd->segmentation_enabled) { // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) + // Reset segment_id to 0 or 1 so that the default transform mode is 4x4 if (cpi->segmentation_map[map_index+mb_col] <= 3) - xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]; + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index+mb_col]&1; else xd->mode_info_context->mbmi.segment_id = 0; @@ -487,24 +681,27 @@ void encode_mb_row(VP8_COMP *cpi, if (cm->frame_type == KEY_FRAME) { *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp); + //Note the encoder may have changed the segment_id + #ifdef MODE_STATS - y_modes[xd->mbmi.mode] ++; + y_modes[xd->mode_info_context->mbmi.mode] ++; #endif } else { *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset); + //Note the encoder may have changed the segment_id #ifdef MODE_STATS - inter_y_modes[xd->mbmi.mode] ++; + inter_y_modes[xd->mode_info_context->mbmi.mode] ++; - if (xd->mbmi.mode == SPLITMV) + if (xd->mode_info_context->mbmi.mode == SPLITMV) { int b; - for (b = 0; b < xd->mbmi.partition_count; b++) + for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition->bmi[b].mode] ++; + inter_b_modes[x->partition_info->bmi[b].mode] ++; } } @@ -545,10 +742,12 @@ void encode_mb_row(VP8_COMP *cpi, // Increment the activity mask pointers. x->mb_activity_ptr++; + /* Test code if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) xd->mode_info_context->mbmi.segment_id = 0; else xd->mode_info_context->mbmi.segment_id = 1; + */ /* save the block info */ for (i = 0; i < 16; i++) @@ -566,7 +765,7 @@ void encode_mb_row(VP8_COMP *cpi, //cpi->segmentation_map[mb_row * cm->mb_cols + mb_col] = xd->mbmi.segment_id; if (cm->frame_type == KEY_FRAME) { - segment_counts[xd->mode_info_context->mbmi.segment_id] ++; + segment_counts[xd->mode_info_context->mbmi.segment_id]++; } else { @@ -724,7 +923,7 @@ void vp8_encode_frame(VP8_COMP *cpi) TOKENEXTRA *tp = cpi->tok; #if CONFIG_SEGMENTATION int segment_counts[MAX_MB_SEGMENTS + SEEK_SEGID]; - int prob[3]; + int prob[3] = {255, 255, 255}; int new_cost, original_cost; #else int segment_counts[MAX_MB_SEGMENTS]; @@ -901,7 +1100,7 @@ void vp8_encode_frame(VP8_COMP *cpi) if (xd->segmentation_enabled) { int tot_count; - int i,j; + int i; int count1,count2,count3,count4; // Set to defaults @@ -1218,7 +1417,7 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) do { - ++ bct[xd->block[b].bmi.mode]; + ++ bct[xd->block[b].bmi.as_mode]; } while (++b < 16); } @@ -1256,6 +1455,10 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { int rate; +#if CONFIG_T8X8 + if (x->e_mbd.segmentation_enabled) + x->e_mbd.update_mb_segmentation_map = 1; +#endif if (cpi->sf.RD && cpi->compressor_speed != 2) vp8_rd_pick_intra_mode(cpi, x, &rate); else @@ -1270,14 +1473,26 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED) vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); else + { +#if CONFIG_T8X8 + if (x->e_mbd.segmentation_enabled) + x->e_mbd.mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_intra(x) << 1); +#endif vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); - vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - sum_intra_stats(cpi, x); - vp8_tokenize_mb(cpi, &x->e_mbd, t); - + vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); + sum_intra_stats(cpi, x); + vp8_tokenize_mb(cpi, &x->e_mbd, t); +#if CONFIG_T8X8 + if( x->e_mbd.mode_info_context->mbmi.segment_id >=2) + cpi->t8x8_count++; + else + cpi->t4x4_count++; +#endif + } return rate; } + #ifdef SPEEDSTATS extern int cnt_pm; #endif @@ -1353,7 +1568,7 @@ int vp8cx_encode_inter_macroblock cpi->last_mb_distortion = distortion; #endif - // MB level adjutment to quantizer setup + // MB level adjustment to quantizer setup if (xd->segmentation_enabled) { // If cyclic update enabled @@ -1397,16 +1612,25 @@ int vp8cx_encode_inter_macroblock cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + x->e_mbd.update_mb_segmentation_map = 1; +#endif + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - if (xd->mode_info_context->mbmi.mode == B_PRED) { + vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); } else { +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + xd->mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_intra(x) << 1); +#endif + vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); } @@ -1415,6 +1639,10 @@ int vp8cx_encode_inter_macroblock else { int ref_fb_idx; +#if CONFIG_T8X8 + if (xd->segmentation_enabled) + xd->mode_info_context->mbmi.segment_id |= (vp8_8x8_selection_inter(x) << 1); +#endif vp8_build_uvmvs(xd, cpi->common.full_pixel); @@ -1444,9 +1672,40 @@ int vp8cx_encode_inter_macroblock xd->dst.y_stride, xd->dst.uv_stride); } +#if CONFIG_T8X8 + if (x->e_mbd.mode_info_context->mbmi.segment_id >=2) + cpi->t8x8_count++; + else + cpi->t4x4_count++; +#endif if (!x->skip) + { +#ifdef ENC_DEBUG + if (enc_debug) + { + int i; + printf("Segment=%d [%d, %d]: %d %d:\n", x->e_mbd.mode_info_context->mbmi.segment_id, mb_col_debug, mb_row_debug, xd->mb_to_left_edge, xd->mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", xd->qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("\n"); + printf("eobs = "); + for (i=0;i<25;i++) + printf("%d:%d ", i, xd->block[i].eob); + printf("\n"); + fflush(stdout); + } +#endif vp8_tokenize_mb(cpi, xd, t); +#ifdef ENC_DEBUG + if (enc_debug) { + printf("Tokenized\n"); + fflush(stdout); + } +#endif + } else { if (cpi->common.mb_no_coeff_skip) diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 59db0253b..10afed3ec 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -22,6 +22,10 @@ #include "encodeintra.h" +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) #else @@ -96,15 +100,67 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mby)(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_intra_mby_8x8(x); + else +#endif vp8_transform_intra_mby(x); - vp8_quantize_mby(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mby_8x8(x); + else +#endif + vp8_quantize_mby(x); if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_optimize_mby_8x8(x, rtcd); + else +#endif vp8_optimize_mby(x, rtcd); + } - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + +#ifdef ENC_DEBUG + if (enc_debug) { + int i; + printf("Intra qcoeff:\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("Intra predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("eobs:\n"); + for (i=0;i<25;i++) + printf("%d ", x->e_mbd.block[i].eob); + printf("\n"); + } +#endif RECON_INVOKE(&rtcd->common->recon, recon_mby) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); @@ -116,14 +172,66 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_mbuv)(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mbuv_8x8(x); + else +#endif + vp8_transform_mbuv(x); - vp8_transform_mbuv(x); - - vp8_quantize_mbuv(x); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mbuv_8x8(x); + else +#endif + vp8_quantize_mbuv(x); + +#ifdef ENC_DEBUG + if (enc_debug) { + int i; + printf("vp8_encode_intra16x16mbuv\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + printf("qcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("eobs:\n"); + for (i=0;i<25;i++) + printf("%d ", x->e_mbd.block[i].eob); + printf("\n"); + } +#endif if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_optimize_mbuv_8x8(x, rtcd); + else +#endif vp8_optimize_mbuv(x, rtcd); + } +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd); vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd); diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index eb89bba0a..00a19bd88 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -26,6 +26,11 @@ #else #define IF_RTCD(x) NULL #endif + +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *src_ptr = (*(be->base_src) + be->src); @@ -117,6 +122,19 @@ static void build_dcblock(MACROBLOCK *x) src_diff_ptr[i] = x->coeff[i * 16]; } } +void vp8_build_dcblock_8x8(MACROBLOCK *x) +{ + short *src_diff_ptr = &x->src_diff[384]; + int i; + for (i = 0; i < 16; i++) + { + src_diff_ptr[i] = 0; + } + src_diff_ptr[0] = x->coeff[0 * 16]; + src_diff_ptr[1] = x->coeff[4 * 16]; + src_diff_ptr[4] = x->coeff[8 * 16]; + src_diff_ptr[8] = x->coeff[12 * 16]; +} void vp8_transform_mbuv(MACROBLOCK *x) { @@ -197,10 +215,104 @@ static void transform_mby(MACROBLOCK *x) } } +#if CONFIG_T8X8 +void vp8_transform_mbuv_8x8(MACROBLOCK *x) +{ + int i; -#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) + for (i = 16; i < 24; i += 4) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); + } +} + + +void vp8_transform_intra_mby_8x8(MACROBLOCK *x)//changed +{ + int i; + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + vp8_build_dcblock_8x8(x); + //vp8_build_dcblock(x); + + // do 2nd order transform on the dc block + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); + +} + + +void vp8_transform_mb_8x8(MACROBLOCK *x) +{ + int i; + + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + vp8_build_dcblock_8x8(x); + //vp8_build_dcblock(x); + + for (i = 16; i < 24; i += 4) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); + } + + // do 2nd order transform on the dc block + if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); +} + +void vp8_transform_mby_8x8(MACROBLOCK *x) +{ + int i; + + for (i = 0; i < 9; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); + } + for (i = 2; i < 11; i += 8) + { + x->vp8_short_fdct8x8(&x->block[i].src_diff[0], + &x->block[i+2].coeff[0], 32); + } + // build dc block from 16 y dc values + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + { + //vp8_build_dcblock(x); + vp8_build_dcblock_8x8(x); + x->short_fhaar2x2(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); + } +} + +#endif + +#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) +#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) typedef struct vp8_token_state vp8_token_state; struct vp8_token_state{ @@ -581,27 +693,554 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) } } +#if CONFIG_T8X8 +void optimize_b_8x8(MACROBLOCK *mb, int i, int type, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, ENTROPY_CONTEXT *l1, + const VP8_ENCODER_RTCD *rtcd) +{ + BLOCK *b; + BLOCKD *d; + vp8_token_state tokens[65][2]; + unsigned best_mask[2]; + const short *dequant_ptr; + const short *coeff_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + int eob; + int i0; + int rc; + int x; + int sz = 0; + int next; + int rdmult; + int rddiv; + int final_eob; + int rd_cost0; + int rd_cost1; + int rate0; + int rate1; + int error0; + int error1; + int t0; + int t1; + int best; + int band; + int pt; + + b = &mb->block[i]; + d = &mb->e_mbd.block[i]; + + /* Enable this to test the effect of RDO as a replacement for the dynamic + * zero bin instead of an augmentation of it. + */ +#if 0 + vp8_strict_quantize_b(b, d); +#endif + + dequant_ptr = d->dequant; + coeff_ptr = b->coeff; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + i0 = !type; + eob = d->eob; + + /* Now set up a Viterbi trellis to evaluate alternative roundings. */ + /* TODO: These should vary with the block type, since the quantizer does. */ + rdmult = mb->rdmult << 2; + rddiv = mb->rddiv; + best_mask[0] = best_mask[1] = 0; + /* Initialize the sentinel node of the trellis. */ + tokens[eob][0].rate = 0; + tokens[eob][0].error = 0; + tokens[eob][0].next = 64; + tokens[eob][0].token = DCT_EOB_TOKEN; + tokens[eob][0].qc = 0; + *(tokens[eob] + 1) = *(tokens[eob] + 0); + next = eob; + for (i = eob; i-- > i0;) + { + int base_bits; + int d2; + int dx; + + rc = vp8_default_zig_zag1d_8x8[i]; + x = qcoeff_ptr[rc]; + /* Only add a trellis state for non-zero coefficients. */ + if (x) + { + int shortcut=0; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + /* Evaluate the first possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + t0 = (vp8_dct_value_tokens_ptr + x)->Token; + /* Consider both possible successor states. */ + if (next < 64) + { + band = vp8_coef_bands_8x8[i + 1]; + pt = vp8_prev_token_class[t0]; + rate0 += + mb->token_costs[type][band][pt][tokens[next][0].token]; + rate1 += + mb->token_costs[type][band][pt][tokens[next][1].token]; + } + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; + d2 = dx*dx; + tokens[i][0].rate = base_bits + (best ? rate1 : rate0); + tokens[i][0].error = d2 + (best ? error1 : error0); + tokens[i][0].next = next; + tokens[i][0].token = t0; + tokens[i][0].qc = x; + best_mask[0] |= best << i; + /* Evaluate the second possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + + if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) && + (abs(x)*dequant_ptr[rc!=0]Token; + } + if (next < 64) + { + band = vp8_coef_bands_8x8[i + 1]; + if(t0!=DCT_EOB_TOKEN) + { + pt = vp8_prev_token_class[t0]; + rate0 += mb->token_costs[type][band][pt][ + tokens[next][0].token]; + } + if(t1!=DCT_EOB_TOKEN) + { + pt = vp8_prev_token_class[t1]; + rate1 += mb->token_costs[type][band][pt][ + tokens[next][1].token]; + } + } + + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + + if(shortcut) + { + dx -= (dequant_ptr[rc!=0] + sz) ^ sz; + d2 = dx*dx; + } + tokens[i][1].rate = base_bits + (best ? rate1 : rate0); + tokens[i][1].error = d2 + (best ? error1 : error0); + tokens[i][1].next = next; + tokens[i][1].token =best?t1:t0; + tokens[i][1].qc = x; + best_mask[1] |= best << i; + /* Finally, make this the new head of the trellis. */ + next = i; + } + /* There's no choice to make for a zero coefficient, so we don't + * add a new trellis node, but we do need to update the costs. + */ + else + { + band = vp8_coef_bands_8x8[i + 1]; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + /* Update the cost of each path if we're past the EOB token. */ + if (t0 != DCT_EOB_TOKEN) + { + tokens[next][0].rate += mb->token_costs[type][band][0][t0]; + tokens[next][0].token = ZERO_TOKEN; + } + if (t1 != DCT_EOB_TOKEN) + { + tokens[next][1].rate += mb->token_costs[type][band][0][t1]; + tokens[next][1].token = ZERO_TOKEN; + } + /* Don't update next, because we didn't add a new node. */ + } + } + + /* Now pick the best path through the whole trellis. */ + band = vp8_coef_bands_8x8[i + 1]; + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + rate0 += mb->token_costs[type][band][pt][t0]; + rate1 += mb->token_costs[type][band][pt][t1]; + rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1); + } + best = rd_cost1 < rd_cost0; + final_eob = i0 - 1; + for (i = next; i < eob; i = next) + { + x = tokens[i][best].qc; + if (x) + final_eob = i; + rc = vp8_default_zig_zag1d_8x8[i]; + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; + next = tokens[i][best].next; + best = (best_mask[best] >> i) & 1; + } + final_eob++; + + d->eob = final_eob; + *a = *l = (d->eob != !type); + +} + +void optimize_mb_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + int type; + int has_2nd_order; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + type = has_2nd_order ? 0 : 3; + + for (b = 0; b < 16; b+=4) + { + optimize_b_8x8(x, b, type, + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+4], + rtcd); + + if(b==0) + { + *(ta + vp8_block2above[1]) = *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[5]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[1]) = *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[5]) = *(tl + vp8_block2left[b]); + } + else if(b==4) + { + *(ta + vp8_block2above[2]) = *(ta + vp8_block2above[3]) = *(ta + vp8_block2above[6]) = *(ta + vp8_block2above[7]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[2]) = *(tl + vp8_block2left[3]) = *(tl + vp8_block2left[6]) = *(tl + vp8_block2left[7]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[1]); + *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[1]); + } + else if(b==8) + { + *(ta + vp8_block2above[9]) = *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[13]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[9]) = *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[13]) = *(tl + vp8_block2left[b]); + + } + else if(b==12) + { + *(ta + vp8_block2above[10]) = *(ta + vp8_block2above[11]) = *(ta + vp8_block2above[14]) = *(ta + vp8_block2above[15]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[10]) = *(tl + vp8_block2left[11]) = *(tl + vp8_block2left[14]) = *(tl + vp8_block2left[15]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[8]); + *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[8]); + + } + + + + } + + for (b = 16; b < 20; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + for (b = 20; b < 24; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + + /* + if (has_2nd_order) + { + vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT], + x->e_mbd.left_context[Y2CONTEXT], 1); + optimize_b(x, 24, 1, t.a, t.l, rtcd); + } + */ +} + +void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + int type; + int has_2nd_order; + + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + if (!x->e_mbd.above_context) + return; + + if (!x->e_mbd.left_context) + return; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + type = has_2nd_order ? 0 : 3; + + for (b = 0; b < 16; b+=4) + { + optimize_b_8x8(x, b, type, + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+4], + rtcd); + if(b==0) + { + *(ta + vp8_block2above[1]) = *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[5]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[1]) = *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[5]) = *(tl + vp8_block2left[b]); + } + else if(b==4) + { + *(ta + vp8_block2above[2]) = *(ta + vp8_block2above[3]) = *(ta + vp8_block2above[6]) = *(ta + vp8_block2above[7]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[2]) = *(tl + vp8_block2left[3]) = *(tl + vp8_block2left[6]) = *(tl + vp8_block2left[7]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[4]) = *(ta + vp8_block2above[1]); + *(tl + vp8_block2left[4]) = *(tl + vp8_block2left[1]); + } + else if(b==8) + { + *(ta + vp8_block2above[9]) = *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[13]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[9]) = *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[13]) = *(tl + vp8_block2left[b]); + + } + else if(b==12) + { + *(ta + vp8_block2above[10]) = *(ta + vp8_block2above[11]) = *(ta + vp8_block2above[14]) = *(ta + vp8_block2above[15]) = *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[10]) = *(tl + vp8_block2left[11]) = *(tl + vp8_block2left[14]) = *(tl + vp8_block2left[15]) = *(tl + vp8_block2left[b]); + *(ta + vp8_block2above[12]) = *(ta + vp8_block2above[8]); + *(tl + vp8_block2left[12]) = *(tl + vp8_block2left[8]); + + } + + + } + + /* + if (has_2nd_order) + { + vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT], + x->e_mbd.left_context[Y2CONTEXT], 1); + optimize_b(x, 24, 1, t.a, t.l, rtcd); + } + */ +} + +void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) +{ + int b; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + if (!x->e_mbd.above_context) + return; + + if (!x->e_mbd.left_context) + return; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + for (b = 16; b < 20; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + + for (b = 20; b < 24; b+=4) + { + optimize_b_8x8(x, b, PLANE_TYPE_UV, //vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], + ta + vp8_block2above[b+1], tl + vp8_block2left[b+2], + rtcd); + *(ta + vp8_block2above[b+1]) = *(ta + vp8_block2above[b+2]) = *(ta + vp8_block2above[b+3]) = + *(ta + vp8_block2above[b]); + *(tl + vp8_block2left[b+1]) = *(tl + vp8_block2left[b+2]) = *(tl + vp8_block2left[b+3]) = + *(tl + vp8_block2left[b]); + + } + +} +#endif + void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mb(&x->e_mbd); vp8_subtract_mb(rtcd, x); - transform_mb(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mb_8x8(x); + else +#endif + transform_mb(x); - vp8_quantize_mb(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_quantize_mb_8x8(x); + else +#endif + vp8_quantize_mb(x); if (x->optimize) + { +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + optimize_mb_8x8(x, rtcd); + else +#endif optimize_mb(x, rtcd); + } - vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) { +#ifdef ENC_DEBUG + if (enc_debug) + { + int i; + printf("qcoeff:\n"); + printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.qcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("dqcoeff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.dqcoeff[i]); + if (i%16 == 15) printf("\n"); + } + printf("diff:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.diff[i]); + if (i%16 == 15) printf("\n"); + } + printf("predictor:\n"); + for (i =0; i<400; i++) { + printf("%3d ", x->e_mbd.predictor[i]); + if (i%16 == 15) printf("\n"); + } + printf("\n"); + } +#endif + } RECON_INVOKE(&rtcd->common->recon, recon_mb) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); +#ifdef ENC_DEBUG + if (enc_debug) { + int i, j, k; + printf("Final Reconstruction\n"); + for (i =0; i<16; i+=4) { + BLOCKD *b = &x->e_mbd.block[i]; + unsigned char *d = *(b->base_dst) + b->dst; + for (k=0; k<4; k++) { + for (j=0; j<16; j++) + printf("%3d ", d[j]); + printf("\n"); + d+=b->dst_stride; + } + } + } +#endif } -/* this funciton is used by first pass only */ +/* this function is used by first pass only */ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { BLOCK *b = &x->block[0]; @@ -610,22 +1249,34 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride); - transform_mby(x); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mby_8x8(x); + else +#endif + transform_mby(x); vp8_quantize_mby(x); - - vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd); + else +#endif + vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); RECON_INVOKE(&rtcd->common->recon, recon_mby) (IF_RTCD(&rtcd->common->recon), &x->e_mbd); } - void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) { vp8_build_inter_predictors_mbuv(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); - +#if CONFIG_T8X8 + if(x->e_mbd.mode_info_context->mbmi.segment_id >= 2) + vp8_transform_mbuv_8x8(x); + else +#endif vp8_transform_mbuv(x); vp8_quantize_mbuv(x); diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h index 47fc72dad..73f1ad223 100644 --- a/vp8/encoder/encodemb.h +++ b/vp8/encoder/encodemb.h @@ -103,4 +103,16 @@ void vp8_encode_inter16x16uvrd(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK * void vp8_optimize_mby(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); void vp8_optimize_mbuv(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); void vp8_encode_inter16x16y(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x); + +#if CONFIG_T8X8 +void vp8_transform_mb_8x8(MACROBLOCK *mb); +void vp8_transform_mbuv_8x8(MACROBLOCK *x); +void vp8_transform_intra_mby_8x8(MACROBLOCK *x); +void vp8_build_dcblock_8x8(MACROBLOCK *b); +void vp8_optimize_mby_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); +void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const struct VP8_ENCODER_RTCD *rtcd); +#endif + + + #endif diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 1a37f03b9..6b9dff8d9 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -24,6 +24,14 @@ extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); extern void vp8_build_block_offsets(MACROBLOCK *x); extern void vp8_setup_block_ptrs(MACROBLOCK *x); +#ifdef MODE_STATS +extern unsigned int inter_y_modes[10]; +extern unsigned int inter_uv_modes[4]; +extern unsigned int inter_b_modes[15]; +extern unsigned int y_modes[5]; +extern unsigned int uv_modes[4]; +extern unsigned int b_modes[14]; +#endif extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); static THREAD_FUNCTION loopfilter_thread(void *p_data) @@ -175,7 +183,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) { *totalrate += vp8cx_encode_intra_macro_block(cpi, x, &tp); #ifdef MODE_STATS - y_modes[xd->mbmi.mode] ++; + y_modes[xd->mode_info_context->mbmi.mode] ++; #endif } else @@ -183,15 +191,15 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) *totalrate += vp8cx_encode_inter_macroblock(cpi, x, &tp, recon_yoffset, recon_uvoffset); #ifdef MODE_STATS - inter_y_modes[xd->mbmi.mode] ++; + inter_y_modes[xd->mode_info_context->mbmi.mode] ++; - if (xd->mbmi.mode == SPLITMV) + if (xd->mode_info_context->mbmi.mode == SPLITMV) { int b; - for (b = 0; b < xd->mbmi.partition_count; b++) + for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition->bmi[b].mode] ++; + inter_b_modes[x->partition_info->bmi[b].mode] ++; } } diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 1ec3a9894..8fe82aea4 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -69,6 +69,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; +#if CONFIG_T8X8 + cpi->rtcd.fdct.short8x8 = vp8_short_fdct8x8_c; + cpi->rtcd.fdct.haar_short2x2 = vp8_short_fhaar2x2_c; +#endif cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c; @@ -86,6 +90,12 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.quantize.quantb_pair = vp8_regular_quantize_b_pair; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_c; +#if CONFIG_T8X8 + cpi->rtcd.quantize.quantb_8x8 = vp8_regular_quantize_b_8x8; + cpi->rtcd.quantize.fastquantb_8x8 = vp8_fast_quantize_b_8x8_c; + cpi->rtcd.quantize.quantb_2x2 = vp8_regular_quantize_b_2x2; + cpi->rtcd.quantize.fastquantb_2x2 = vp8_fast_quantize_b_2x2_c; +#endif cpi->rtcd.search.full_search = vp8_full_search_sad; cpi->rtcd.search.refining_search = vp8_refining_search_sad; cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index d603ef532..c857704fc 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -46,6 +46,8 @@ #define RTCD(x) NULL #endif +#define OUTPUT_YUV_REC + extern void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi); extern void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val); extern void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi); @@ -105,10 +107,13 @@ extern double vp8_calc_ssimg #ifdef OUTPUT_YUV_SRC FILE *yuv_file; #endif +#ifdef OUTPUT_YUV_REC +FILE *yuv_rec_file; +#endif #if 0 FILE *framepsnr; -FILE *kf_list; +FILE ikf_list; FILE *keyfile; #endif @@ -130,15 +135,20 @@ unsigned int tot_ef = 0; unsigned int cnt_ef = 0; #endif +#if defined(SECTIONBITS_OUTPUT) +extern unsigned __int64 Sectionbits[500]; +#endif #ifdef MODE_STATS -extern unsigned __int64 Sectionbits[50]; extern int y_modes[5] ; extern int uv_modes[4] ; extern int b_modes[10] ; - extern int inter_y_modes[10] ; extern int inter_uv_modes[4] ; extern unsigned int inter_b_modes[15]; +#if CONFIG_SEGMENTATION +extern int segment_modes_intra[MAX_MB_SEGMENTS]; +extern int segment_modes_inter[MAX_MB_SEGMENTS]; +#endif #endif extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch); @@ -309,7 +319,11 @@ extern FILE *vpxlogc; static void setup_features(VP8_COMP *cpi) { // Set up default state for MB feature flags +#if CONFIG_SEGMENTATION + cpi->mb.e_mbd.segmentation_enabled = 1; +#else cpi->mb.e_mbd.segmentation_enabled = 0; +#endif cpi->mb.e_mbd.update_mb_segmentation_map = 0; cpi->mb.e_mbd.update_mb_segmentation_data = 0; vpx_memset(cpi->mb.e_mbd.mb_segment_tree_probs, 255, sizeof(cpi->mb.e_mbd.mb_segment_tree_probs)); @@ -1231,16 +1245,25 @@ void vp8_set_speed_features(VP8_COMP *cpi) if (cpi->sf.improved_dct) { +#if CONFIG_T8X8 + cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8); +#endif cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4); } else { +#if CONFIG_T8X8 + cpi->mb.vp8_short_fdct8x8 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x8); +#endif cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4); } cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4); +#if CONFIG_T8X8 + cpi->mb.short_fhaar2x2 = FDCT_INVOKE(&cpi->rtcd.fdct, haar_short2x2); +#endif if (cpi->sf.improved_quant) { @@ -1248,6 +1271,10 @@ void vp8_set_speed_features(VP8_COMP *cpi) quantb); cpi->mb.quantize_b_pair = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_pair); +#if CONFIG_T8X8 + cpi->mb.quantize_b_8x8 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_8x8); + cpi->mb.quantize_b_2x2 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb_2x2); +#endif } else { @@ -1255,6 +1282,10 @@ void vp8_set_speed_features(VP8_COMP *cpi) fastquantb); cpi->mb.quantize_b_pair = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_pair); +#if CONFIG_T8X8 + cpi->mb.quantize_b_8x8 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_8x8); + cpi->mb.quantize_b_2x2 = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb_2x2); +#endif } if (cpi->sf.improved_quant != last_improved_quant) vp8cx_init_quantizer(cpi); @@ -2037,6 +2068,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) #ifdef OUTPUT_YUV_SRC yuv_file = fopen("bd.yuv", "ab"); #endif +#ifdef OUTPUT_YUV_REC + yuv_rec_file = fopen("rec.yuv", "wb"); +#endif #if 0 framepsnr = fopen("framepsnr.stt", "a"); @@ -2250,8 +2284,8 @@ void vp8_remove_compressor(VP8_PTR *ptr) #ifdef MODE_STATS { extern int count_mb_seg[4]; - FILE *f = fopen("modes.stt", "a"); - double dr = (double)cpi->oxcf.frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ; + FILE *f = fopen("modes.stt", "w"); + double dr = (double)cpi->oxcf.frame_rate * (double)cpi->bytes * (double)8 / (double)cpi->count / (double)1000 ; fprintf(f, "intra_mode in Intra Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]); fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]); @@ -2265,6 +2299,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) fprintf(f, "\n"); } +#if CONFIG_SEGMENTATION + fprintf(f, "Segments:%8d, %8d, %8d, %8d\n", segment_modes_intra[0], segment_modes_intra[1], segment_modes_intra[2], segment_modes_intra[3]); +#endif fprintf(f, "Modes in Inter Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d\n", @@ -2284,8 +2321,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) fprintf(f, "P:%8d, %8d, %8d, %8d\n", count_mb_seg[0], count_mb_seg[1], count_mb_seg[2], count_mb_seg[3]); fprintf(f, "PB:%8d, %8d, %8d, %8d\n", inter_b_modes[LEFT4X4], inter_b_modes[ABOVE4X4], inter_b_modes[ZERO4X4], inter_b_modes[NEW4X4]); - - +#if CONFIG_SEGMENTATION + fprintf(f, "Segments:%8d, %8d, %8d, %8d\n", segment_modes_inter[0], segment_modes_inter[1], segment_modes_inter[2], segment_modes_inter[3]); +#endif fclose(f); } #endif @@ -2373,6 +2411,9 @@ void vp8_remove_compressor(VP8_PTR *ptr) #ifdef OUTPUT_YUV_SRC fclose(yuv_file); #endif +#ifdef OUTPUT_YUV_REC + fclose(yuv_rec_file); +#endif #if 0 @@ -2583,10 +2624,9 @@ int vp8_update_entropy(VP8_PTR comp, int update) } -#if OUTPUT_YUV_SRC -void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) +#ifdef OUTPUT_YUV_SRC +void vp8_write_yuv_frame(YV12_BUFFER_CONFIG *s) { - FILE *yuv_file = fopen(name, "ab"); unsigned char *src = s->y_buffer; int h = s->y_height; @@ -2616,8 +2656,42 @@ void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) src += s->uv_stride; } while (--h); +} +#endif - fclose(yuv_file); +#ifdef OUTPUT_YUV_REC +void vp8_write_yuv_rec_frame(VP8_COMMON *cm) +{ + YV12_BUFFER_CONFIG *s = cm->frame_to_show; + unsigned char *src = s->y_buffer; + int h = cm->Height; + + do + { + fwrite(src, s->y_width, 1, yuv_rec_file); + src += s->y_stride; + } + while (--h); + + src = s->u_buffer; + h = (cm->Height+1)/2; + + do + { + fwrite(src, s->uv_width, 1, yuv_rec_file); + src += s->uv_stride; + } + while (--h); + + src = s->v_buffer; + h = (cm->Height+1)/2; + + do + { + fwrite(src, s->uv_width, 1, yuv_rec_file); + src += s->uv_stride; + } + while (--h); } #endif @@ -4643,14 +4717,8 @@ static void encode_frame_to_data_rate fclose(recon_file); } #endif -#if 0 - // DEBUG - if(cm->current_video_frame>173 && cm->current_video_frame<178) - { - char filename[512]; - sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); - vp8_write_yuv_frame(filename, cm->frame_to_show); - } +#ifdef OUTPUT_YUV_REC + vp8_write_yuv_rec_frame(cm); #endif } @@ -5005,7 +5073,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon } else #endif - encode_frame_to_data_rate(cpi, size, dest, frame_flags); + encode_frame_to_data_rate(cpi, size, dest, frame_flags); if (cpi->compressor_speed == 2) { diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index bf3889404..81e2ebcac 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -32,12 +32,15 @@ #include "lookahead.h" //#define SPEEDSTATS 1 +#define MODE_STATS 1 +//#define ENC_DEBUG + #define MIN_GF_INTERVAL 4 #define DEFAULT_GF_INTERVAL 7 #define KEY_FRAME_CONTEXT 5 -#define MAX_LAG_BUFFERS (CONFIG_REALTIME_ONLY? 1 : 25) +#define MAX_LAG_BUFFERS (CONFIG_REALTIME_ONLY? 1 : 25)//1:25 #define AF_THRESH 25 #define AF_THRESH2 100 @@ -411,6 +414,11 @@ typedef struct VP8_COMP //save vp8_tree_probs_from_distribution result for each frame to avoid repeat calculation vp8_prob frame_coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; unsigned int frame_branch_ct [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#if CONFIG_T8X8 + unsigned int coef_counts_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; /* for this frame */ + vp8_prob frame_coef_probs_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES]; + unsigned int frame_branch_ct_8x8 [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; +#endif int gfu_boost; int kf_boost; @@ -469,6 +477,8 @@ typedef struct VP8_COMP int gf_update_recommended; int skip_true_count; int skip_false_count; + int t4x4_count; + int t8x8_count; unsigned char *segmentation_map; signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment data (can be deltas or absolute values) @@ -636,7 +646,7 @@ int rd_cost_intra_mb(MACROBLOCKD *x); void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **); void vp8_set_speed_features(VP8_COMP *cpi); - +extern void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s); #if CONFIG_DEBUG #define CHECK_MEM_ERROR(lval,expr) do {\ lval = (expr); \ diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 503d24123..f109135b7 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -16,6 +16,10 @@ #include "quantize.h" #include "vp8/common/quant_common.h" +#ifdef ENC_DEBUG +extern int enc_debug; +#endif + #define EXACT_QUANT #ifdef EXACT_FASTQUANT @@ -78,6 +82,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + eob = -1; for (i = 0; i < 16; i++) { @@ -267,7 +274,8 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) d->eob = eob + 1; } -#endif +#endif //EXACT_QUANT + void vp8_quantize_mby_c(MACROBLOCK *x) { @@ -301,6 +309,592 @@ void vp8_quantize_mbuv_c(MACROBLOCK *x) x->quantize_b(&x->block[i], &x->e_mbd.block[i]); } +#if CONFIG_T8X8 + +#ifdef EXACT_FASTQUANT +void vp8_fast_quantize_b_2x2_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q2nd = 4; + + + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc]/q2nd ; + zbin = zbin_ptr[rc] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc]/q2nd); + x += (round_ptr[rc]); + //y = ((int)((int)(x * quant_ptr[rc] * q2nd) >> 16) + x) + // >> quant_shift_ptr[rc]; // quantize (x) + y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +void vp8_fast_quantize_b_8x8_c(BLOCK *b, BLOCKD *d)// only ac and dc difference, no difference among ac +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc!=0]/q1st ; + zbin = zbin_ptr[rc!=0] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += round_ptr[rc]/q1st; + //y = ((int)(((int)((x * quant_ptr[rc!=0] * q1st)) >> 16) + x)) + // >> quant_shift_ptr[rc!=0]; // quantize (x) + x += round_ptr[rc]; + y = ((int)(((int)((x * quant_ptr[rc!=0])) >> 16) + x)) + >> quant_shift_ptr[rc!=0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +#else + +void vp8_fast_quantize_b_2x2_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc]/q2nd; + zbin = zbin_ptr[rc]; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((int)((x + round_ptr[rc]/q2nd) * quant_ptr[rc] * q2nd)) >> 16; // quantize (x) + y = ((int)((x + round_ptr[rc]) * quant_ptr[rc])) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc] / q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; + //if (d->eob > 4) printf("Flag Fast 2 (%d)\n", d->eob); +} + +void vp8_fast_quantize_b_8x8_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = zbin_ptr[rc!=0]/q1st ; + zbin = zbin_ptr[rc!=0] ; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((int)((x + round_ptr[rc!=0] / q1st) * quant_ptr[rc!=0] * q1st)) >> 16; + y = ((int)((x + round_ptr[rc!=0]) * quant_ptr[rc!=0])) >> 16; + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + if (y) + { + eob = i; // last nonzero coeffs + } + } + } + d->eob = eob + 1; +} + +#endif //EXACT_FASTQUANT + +#ifdef EXACT_QUANT +void vp8_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + unsigned char *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + + //zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value)/q2nd; + zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value); + + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc]/q2nd); + x += (round_ptr[rc]); + y = ((int)((int)(x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc]/q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + unsigned char *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q1st = 2; + + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + + //zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value)/q1st; + zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); + + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //x += (round_ptr[rc!=0]/q1st); + //y = ((int)(((int)(x * quant_ptr[rc!=0] * q1st) >> 16) + x)) + // >> quant_shift_ptr[rc!=0]; // quantize (x) + x += (round_ptr[rc!=0]); + y = ((int)(((int)(x * quant_ptr[rc!=0]) >> 16) + x)) + >> quant_shift_ptr[rc!=0]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0] / q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_strict_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i; + int rc; + int eob; + int x; + int y; + int z; + int sz; + short *coeff_ptr; + short *quant_ptr; + unsigned char *quant_shift_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + short *dequant_ptr; + //double q2nd = 4; + coeff_ptr = b->coeff; + quant_ptr = b->quant; + quant_shift_ptr = b->quant_shift; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + dequant_ptr = d->dequant; + eob = - 1; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + for (i = 0; i < 4; i++) + { + int dq; + int round; + + /*TODO: These arrays should be stored in zig-zag order.*/ + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //z = z * q2nd; + //dq = dequant_ptr[rc]/q2nd; + dq = dequant_ptr[rc]; + round = dq >> 1; + /* Sign of z. */ + sz = -(z < 0); + x = (z + sz) ^ sz; + x += round; + if (x >= dq) + { + /* Quantize x */ + y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc]; + /* Put the sign back. */ + x = (y + sz) ^ sz; + /* Save * the * coefficient and its dequantized value. */ + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dq; + /* Remember the last non-zero coefficient. */ + if (y) + eob = i; + } + } + + d->eob = eob + 1; +} + +void vp8_strict_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i; + int rc; + int eob; + int x; + int y; + int z; + int sz; + short *coeff_ptr; + short *quant_ptr; + unsigned char *quant_shift_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + short *dequant_ptr; + //double q1st = 2; + printf("call strict quantizer\n"); + coeff_ptr = b->coeff; + quant_ptr = b->quant; + quant_shift_ptr = b->quant_shift; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + dequant_ptr = d->dequant; + eob = - 1; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + for (i = 0; i < 64; i++) + { + int dq; + int round; + + /*TODO: These arrays should be stored in zig-zag order.*/ + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //z = z * q1st; + //dq = dequant_ptr[rc!=0]/q1st; + dq = dequant_ptr[rc!=0]; + round = dq >> 1; + /* Sign of z. */ + sz = -(z < 0); + x = (z + sz) ^ sz; + x += round; + if (x >= dq) + { + /* Quantize x. */ + y = ((int)(((int)((x * quant_ptr[rc!=0])) >> 16) + x)) >> quant_shift_ptr[rc!=0]; + /* Put the sign back. */ + x = (y + sz) ^ sz; + /* Save the coefficient and its dequantized value. * */ + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dq; + /* Remember the last non-zero coefficient. */ + if (y) + eob = i; + } + } + d->eob = eob + 1; +} + +#else + +void vp8_regular_quantize_b_2x2(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q2nd = 4; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + for (i = 0; i < 4; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + //zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value)/q2nd; + zbin = (zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value); + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = (((x + round_ptr[rc]/q2nd) * quant_ptr[rc]*q2nd)) >> 16; // quantize (x) + y = (((x + round_ptr[rc]) * quant_ptr[rc])) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc]/q2nd; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + + d->eob = eob + 1; +} + +void vp8_regular_quantize_b_8x8(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + //double q1st = 2; + vpx_memset(qcoeff_ptr, 0, 64*sizeof(short)); + vpx_memset(dqcoeff_ptr, 0, 64*sizeof(short)); + + eob = -1; + for (i = 0; i < 64; i++) + { + + rc = vp8_default_zig_zag1d_8x8[i]; + z = coeff_ptr[rc]; + //zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value)/q1st; + zbin = (zbin_ptr[rc!=0] + *zbin_boost_ptr + zbin_oq_value); + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + //y = ((x + round_ptr[rc!=0]/q1st) * quant_ptr[rc!=0] * q1st) >> 16; + y = ((x + round_ptr[rc!=0]) * quant_ptr[rc!=0]) >> 16; + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + //dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]/q1st; // dequantized value + dqcoeff_ptr[rc] = x * dequant_ptr[rc!=0]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } + d->eob = eob + 1; +} + +#endif //EXACT_QUANT + +void vp8_quantize_mby_8x8(MACROBLOCK *x) +{ + int i; + int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + for(i = 0; i < 16; i ++) + { + x->e_mbd.block[i].eob = 0; + } + x->e_mbd.block[24].eob = 0; + for (i = 0; i < 16; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); + + if (has_2nd_order) + x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); + +} + +void vp8_quantize_mb_8x8(MACROBLOCK *x) +{ + int i; + int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + for(i = 0; i < 25; i ++) + { + x->e_mbd.block[i].eob = 0; + } + for (i = 0; i < 24; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); + + if (has_2nd_order) + x->quantize_b_2x2(&x->block[24], &x->e_mbd.block[24]); +} + +void vp8_quantize_mbuv_8x8(MACROBLOCK *x) +{ + int i; + + for(i = 16; i < 24; i ++) + { + x->e_mbd.block[i].eob = 0; + } + for (i = 16; i < 24; i+=4) + x->quantize_b_8x8(&x->block[i], &x->e_mbd.block[i]); +} + +#endif //CONFIG_T8X8 + /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of * these two C functions if corresponding optimized routine is not available. * NEON optimized version implements currently the fast quantization for pair @@ -731,4 +1325,3 @@ void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) vp8cx_init_quantizer(cpi); } - diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h index f1f0156d8..a6a6078b0 100644 --- a/vp8/encoder/quantize.h +++ b/vp8/encoder/quantize.h @@ -46,6 +46,27 @@ extern prototype_quantize_block_pair(vp8_quantize_quantb_pair); #endif extern prototype_quantize_block(vp8_quantize_fastquantb); +#ifndef vp8_quantize_quantb_8x8 +#define vp8_quantize_quantb_8x8 vp8_regular_quantize_b_8x8 +#endif +extern prototype_quantize_block(vp8_quantize_quantb_8x8); + +#ifndef vp8_quantize_fastquantb_8x8 +#define vp8_quantize_fastquantb_8x8 vp8_fast_quantize_b_8x8_c +#endif +extern prototype_quantize_block(vp8_quantize_fastquantb_8x8); + +#ifndef vp8_quantize_quantb_2x2 +#define vp8_quantize_quantb_2x2 vp8_regular_quantize_b_2x2 +#endif +extern prototype_quantize_block(vp8_quantize_quantb_2x2); + +#ifndef vp8_quantize_fastquantb_2x2 +#define vp8_quantize_fastquantb_2x2 vp8_fast_quantize_b_2x2_c +#endif +extern prototype_quantize_block(vp8_quantize_fastquantb_2x2); + + #ifndef vp8_quantize_fastquantb_pair #define vp8_quantize_fastquantb_pair vp8_fast_quantize_b_pair_c #endif @@ -56,6 +77,10 @@ typedef struct prototype_quantize_block(*quantb); prototype_quantize_block_pair(*quantb_pair); prototype_quantize_block(*fastquantb); + prototype_quantize_block(*quantb_8x8); + prototype_quantize_block(*fastquantb_8x8); + prototype_quantize_block(*quantb_2x2); + prototype_quantize_block(*fastquantb_2x2); prototype_quantize_block_pair(*fastquantb_pair); } vp8_quantize_rtcd_vtable_t; @@ -81,6 +106,8 @@ extern prototype_quantize_mb(vp8_quantize_mby); #endif extern void vp8_strict_quantize_b(BLOCK *b,BLOCKD *d); +extern void vp8_strict_quantize_b_8x8(BLOCK *b,BLOCKD *d); +extern void vp8_strict_quantize_b_2x2(BLOCK *b,BLOCKD *d); struct VP8_COMP; extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q); diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 8f1862f73..5e61271a4 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -251,8 +251,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) { int q; int i; - int *thresh; - int threshmult; vp8_clear_system_state(); //__asm emms; @@ -267,7 +265,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int QIndex) if (cpi->zbin_over_quant > 0) { double oq_factor; - double modq; // Experimental code using the same basic equation as used for Q above // The units of cpi->zbin_over_quant are 1/128 of Q bin size diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index 95134cb81..ea04cbf25 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -13,6 +13,7 @@ #define __INC_RDOPT_H #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) +#define RDCOST_8x8(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue); extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra); diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 15e7336b1..62581b87f 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include "onyx_int.h" @@ -22,18 +23,27 @@ #ifdef ENTROPY_STATS _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#if CONFIG_T8X8 +_int64 context_counters_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +#endif #endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; +#if CONFIG_T8X8 +void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; +#endif void vp8_fix_contexts(MACROBLOCKD *x); static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE*2]; const TOKENVALUE *vp8_dct_value_tokens_ptr; static int dct_value_cost[DCT_MAX_VALUE*2]; const int *vp8_dct_value_cost_ptr; -#if 0 -int skip_true_count = 0; -int skip_false_count = 0; + +#ifdef ENC_DEBUG +extern int mb_row_debug; +extern int mb_col_debug; +extern int enc_debug; #endif + static void fill_value_tokens() { @@ -93,6 +103,69 @@ static void fill_value_tokens() vp8_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } +#if CONFIG_T8X8 +static void tokenize2nd_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + int c = 0; /* start at DC */ + const int eob = b->eob; /* one beyond last nonzero coeff */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + int x; + const short *qcoeff_ptr = b->qcoeff; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + + assert(eob<=4); + + do + { + const int band = vp8_coef_bands[c]; + + if (c < eob) + { + int rc = vp8_default_zig_zag1d[c]; + const int v = qcoeff_ptr[rc]; + + assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + x = vp8_dct_value_tokens_ptr[v].Token; + } + else + x = DCT_EOB_TOKEN; + + t->Token = x; + //printf("Token : %d\n", x); + t->context_tree = cpi->common.fc.coef_probs_8x8 [type] [band] [pt]; + + t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + +#ifdef ENC_DEBUG + if (t->skip_eob_node && vp8_coef_encodings[x].Len==1) + printf("Trouble 2 x=%d Len=%d skip=%d eob=%d c=%d band=%d type=%d: [%d %d %d]\n", + x, vp8_coef_encodings[x].Len, t->skip_eob_node, eob, c, band, type, + cpi->count, mb_row_debug, mb_col_debug); +#endif + + ++cpi->coef_counts_8x8 [type] [band] [pt] [x]; + } + while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 4); + + *tp = t; + pt = (c != !type); /* 0 <-> all coeff data is zero */ + *a = *l = pt; + +} +#endif + static void tokenize2nd_order_b ( MACROBLOCKD *x, @@ -153,6 +226,66 @@ static void tokenize2nd_order_b *a = *l = pt; } +#if CONFIG_T8X8 +static void tokenize1st_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + int c = type ? 0 : 1; /* start at DC unless type 0 */ + const int eob = b->eob; /* one beyond last nonzero coeff */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + int x; + const short *qcoeff_ptr = b->qcoeff; + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + + do + { + const int band = vp8_coef_bands_8x8[c]; + + x = DCT_EOB_TOKEN; + + if (c < eob) + { + int rc = vp8_default_zig_zag1d_8x8[c]; + const int v = qcoeff_ptr[rc]; + + assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + x = vp8_dct_value_tokens_ptr[v].Token; + } + + t->Token = x; + t->context_tree = cpi->common.fc.coef_probs_8x8 [type] [band] [pt]; + + t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + +#ifdef ENC_DEBUG + if (t->skip_eob_node && vp8_coef_encodings[x].Len==1) + printf("Trouble 1 x=%d Len=%d skip=%d eob=%d c=%d band=%d type=%d: [%d %d %d]\n", x, vp8_coef_encodings[x].Len, t->skip_eob_node, eob, c, band, type, cpi->count, mb_row_debug, mb_col_debug); +#endif + + ++cpi->coef_counts_8x8 [type] [band] [pt] [x]; + } + while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 64); + + *tp = t; + pt = (c != !type); /* 0 <-> all coeff data is zero */ + *a = *l = pt; +} + +#endif + static void tokenize1st_order_b ( @@ -293,22 +426,59 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) return skip; } +#if CONFIG_T8X8 +static int mb_is_skippable_8x8(MACROBLOCKD *x) +{ + int has_y2_block; + int skip = 1; + int i = 0; + + has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED + && x->mode_info_context->mbmi.mode != SPLITMV); + if (has_y2_block) + { + for (i = 0; i < 16; i+=4) + skip &= (x->block[i].eob < 2); + } + + for (; i < 24 + has_y2_block; i+=4) + skip &= (!x->block[i].eob); + + return skip; +} +#endif void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { int plane_type; int has_y2_block; + int b; has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV); - x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x, has_y2_block); + x->mode_info_context->mbmi.mb_skip_coeff = +#if CONFIG_T8X8 + (x->mode_info_context->mbmi.segment_id >= 2 ? + mb_is_skippable_8x8(x) : + mb_is_skippable(x, has_y2_block)); +#else + mb_is_skippable(x, has_y2_block); +#endif + if (x->mode_info_context->mbmi.mb_skip_coeff) { cpi->skip_true_count++; if (!cpi->common.mb_no_coeff_skip) - vp8_stuff_mb(cpi, x, t) ; + { +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + vp8_stuff_mb_8x8(cpi, x, t) ; + else +#endif + vp8_stuff_mb(cpi, x, t) ; + } else { vp8_fix_contexts(x); @@ -322,13 +492,82 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) plane_type = 3; if(has_y2_block) { - tokenize2nd_order_b(x, t, cpi); - plane_type = 0; +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + { + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + tokenize2nd_order_b_8x8(x->block + 24, t, 1, x->frame_type, + A + vp8_block2above[24], L + vp8_block2left[24], cpi); + } + else +#endif + tokenize2nd_order_b(x, t, cpi); + + plane_type = 0; } +#if CONFIG_T8X8 + if (x->mode_info_context->mbmi.segment_id >= 2) + { + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + for (b = 0; b < 16; b+=4) + { + tokenize1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+4], + cpi); + /* *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]);*/ + // build coeff context for 8x8 transform + if(b==0) + { + *(A + vp8_block2above[1]) = *(A + vp8_block2above[4]) = *(A + vp8_block2above[5]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[1]) = *(L + vp8_block2left[4]) = *(L + vp8_block2left[5]) = *(L + vp8_block2left[b]); + } + else if(b==4) + { + *(A + vp8_block2above[2]) = *(A + vp8_block2above[3]) = *(A + vp8_block2above[6]) = *(A + vp8_block2above[7]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[2]) = *(L + vp8_block2left[3]) = *(L + vp8_block2left[6]) = *(L + vp8_block2left[7]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[4]) = *(A + vp8_block2above[1]); + *(L + vp8_block2left[4]) = *(L + vp8_block2left[1]); + } + else if(b==8) + { + *(A + vp8_block2above[9]) = *(A + vp8_block2above[12]) = *(A + vp8_block2above[13]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[9]) = *(L + vp8_block2left[12]) = *(L + vp8_block2left[13]) = *(L + vp8_block2left[b]); + } + else if(b==12) + { + *(A + vp8_block2above[10]) = *(A + vp8_block2above[11]) = *(A + vp8_block2above[14]) = *(A + vp8_block2above[15]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[10]) = *(L + vp8_block2left[11]) = *(L + vp8_block2left[14]) = *(L + vp8_block2left[15]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[12]) = *(A + vp8_block2above[8]); + *(L + vp8_block2left[12]) = *(L + vp8_block2left[8]); + } - tokenize1st_order_b(x, t, plane_type, cpi); + } + for (b = 16; b < 24; b+=4) { + tokenize1st_order_b_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+2], + cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + } + else +#endif + tokenize1st_order_b(x, t, plane_type, cpi); } @@ -337,6 +576,9 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) void init_context_counters(void) { vpx_memset(context_counters, 0, sizeof(context_counters)); +#if CONFIG_T8X8 + vpx_memset(context_counters_8x8, 0, sizeof(context_counters_8x8)); +#endif } void print_context_counters() @@ -356,6 +598,56 @@ void print_context_counters() # define Comma( X) (X? ",":"") + + type = 0; + + do + { + fprintf(f, "%s\n { /* block Type %d */", Comma(type), type); + + band = 0; + + do + { + fprintf(f, "%s\n { /* Coeff Band %d */", Comma(band), band); + + pt = 0; + + do + { + fprintf(f, "%s\n {", Comma(pt)); + + t = 0; + + do + { + const _int64 x = context_counters [type] [band] [pt] [t]; + const int y = (int) x; + + assert(x == (INT64) y); /* no overflow handling yet */ + fprintf(f, "%s %d", Comma(t), y); + + } + while (++t < MAX_ENTROPY_TOKENS); + + fprintf(f, "}"); + } + while (++pt < PREV_COEF_CONTEXTS); + + fprintf(f, "\n }"); + + } + while (++band < COEF_BANDS); + + fprintf(f, "\n }"); + } + while (++type < BLOCK_TYPES); + +#if CONFIG_T8X8 + fprintf(f, "int Contexts_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS];\n\n"); + + fprintf(f, "const int default_contexts_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS] = {"); + type = 0; do @@ -399,6 +691,7 @@ void print_context_counters() fprintf(f, "\n }"); } while (++type < BLOCK_TYPES); +#endif fprintf(f, "\n};\n"); fclose(f); @@ -411,6 +704,188 @@ void vp8_tokenize_initialize() fill_value_tokens(); } +#if CONFIG_T8X8 +static __inline void stuff2nd_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [1] [0] [pt]; + //t->section = 11; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8 [1] [0] [pt] [DCT_EOB_TOKEN]; + ++t; + + *tp = t; + pt = 0; + *a = *l = pt; + +} + +static __inline void stuff1st_order_b_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [0] [1] [pt]; + //t->section = 8; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8 [0] [1] [pt] [DCT_EOB_TOKEN]; + ++t; + *tp = t; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; + + +} + +static __inline +void stuff1st_order_buv_8x8 +( + const BLOCKD *const b, + TOKENEXTRA **tp, + const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + const FRAME_TYPE frametype, + ENTROPY_CONTEXT *a, + ENTROPY_CONTEXT *l, + ENTROPY_CONTEXT *a1, + ENTROPY_CONTEXT *l1, + VP8_COMP *cpi +) +{ + int pt; /* near block/prev token context index */ + TOKENEXTRA *t = *tp; /* store tokens starting here */ + VP8_COMBINEENTROPYCONTEXTS_8x8(pt, *a, *l, *a1, *l1); + (void) frametype; + (void) type; + (void) b; + + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs_8x8 [2] [0] [pt]; + //t->section = 13; + t->skip_eob_node = 0; + ++cpi->coef_counts_8x8[2] [0] [pt] [DCT_EOB_TOKEN]; + ++t; + *tp = t; + pt = 0; /* 0 <-> all coeff data is zero */ + *a = *l = pt; + +} + +void vp8_stuff_mb_8x8(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) +{ + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; + int plane_type; + int b; + + stuff2nd_order_b_8x8(x->block + 24, t, 1, x->frame_type, + A + vp8_block2above[24], L + vp8_block2left[24], cpi); + plane_type = 0; + + for (b = 0; b < 16; b+=4) { + stuff1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+4], + cpi); + // build coeff context for 8x8 transform + if(b==0) + { + *(A + vp8_block2above[1]) = *(A + vp8_block2above[4]) = *(A + vp8_block2above[5]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[1]) = *(L + vp8_block2left[4]) = *(L + vp8_block2left[5]) = *(L + vp8_block2left[b]); + } + else if(b==4) + { + *(A + vp8_block2above[2]) = *(A + vp8_block2above[3]) = *(A + vp8_block2above[6]) = *(A + vp8_block2above[7]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[2]) = *(L + vp8_block2left[3]) = *(L + vp8_block2left[6]) = *(L + vp8_block2left[7]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[4]) = *(A + vp8_block2above[1]); + *(L + vp8_block2left[4]) = *(L + vp8_block2left[1]); + } + else if(b==8) + { + *(A + vp8_block2above[9]) = *(A + vp8_block2above[12]) = *(A + vp8_block2above[13]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[9]) = *(L + vp8_block2left[12]) = *(L + vp8_block2left[13]) = *(L + vp8_block2left[b]); + + } + else if(b==12) + { + *(A + vp8_block2above[10]) = *(A + vp8_block2above[11]) = *(A + vp8_block2above[14]) = *(A + vp8_block2above[15]) = *(A + vp8_block2above[b]); + *(L + vp8_block2left[10]) = *(L + vp8_block2left[11]) = *(L + vp8_block2left[14]) = *(L + vp8_block2left[15]) = *(L + vp8_block2left[b]); + *(A + vp8_block2above[12]) = *(A + vp8_block2above[8]); + *(L + vp8_block2left[12]) = *(L + vp8_block2left[8]); + + } + + } + /* + for (b = 0; b < 16; b+=4) { + stuff1st_order_b_8x8(x->block + b, t, plane_type, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + */ + + for (b = 16; b < 24; b+=4) { + stuff1st_order_buv_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], + A + vp8_block2above[b+1], + L + vp8_block2left[b+2], + cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + /* + for (b = 16; b < 24; b+=4) { + stuff1st_order_buv_8x8(x->block + b, t, 2, x->frame_type, + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); + *(A + vp8_block2above[b+1]) = *(A + vp8_block2above[b+2]) = *(A + vp8_block2above[b+3]) = + *(A + vp8_block2above[b]); + *(L + vp8_block2left[b+1]) = *(L + vp8_block2left[b+2]) = *(L + vp8_block2left[b+3]) = + *(L + vp8_block2left[b]); + } + */ +} +#endif static __inline void stuff2nd_order_b ( @@ -458,6 +933,7 @@ static __inline void stuff1st_order_b *a = *l = pt; } + static __inline void stuff1st_order_buv ( diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h index 04a8879cf..d7ef529b8 100644 --- a/vp8/encoder/tokenize.h +++ b/vp8/encoder/tokenize.h @@ -38,6 +38,7 @@ void init_context_counters(); void print_context_counters(); extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; +extern _int64 context_counters_8x8[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; #endif extern const int *vp8_dct_value_cost_ptr; -- 2.40.0