From: Yaowu Xu Date: Mon, 11 Jan 2016 18:27:35 +0000 (-0800) Subject: Merge branch 'master' into nextgenv2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0367f32ea87e1b224768ca995000709fbf14e7eb;p=libvpx Merge branch 'master' into nextgenv2 Manually resovled the following conflicts: vp10/common/blockd.h vp10/common/entropy.h vp10/common/entropymode.c vp10/common/entropymode.h vp10/common/enums.h vp10/common/thread_common.c vp10/decoder/decodeframe.c vp10/decoder/decodemv.c vp10/encoder/bitstream.c vp10/encoder/encodeframe.c vp10/encoder/rd.c vp10/encoder/rdopt.c Change-Id: I15d20ce5292b70f0c2b4ba55c1f1318181481596 --- 0367f32ea87e1b224768ca995000709fbf14e7eb diff --cc vp10/common/blockd.h index 66e29d41b,fce176796..dd5c2d17b --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@@ -128,17 -82,10 +128,15 @@@ typedef struct // Only for INTER blocks INTERP_FILTER interp_filter; MV_REFERENCE_FRAME ref_frame[2]; - #if CONFIG_EXT_TX TX_TYPE tx_type; - #endif // CONFIG_EXT_TX - // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. +#if CONFIG_EXT_INTRA + EXT_INTRA_MODE_INFO ext_intra_mode_info; + int8_t angle_delta[2]; +#endif // CONFIG_EXT_INTRA + int_mv mv[2]; + int_mv pred_mv[2]; } MB_MODE_INFO; typedef struct MODE_INFO { @@@ -390,79 -226,12 +388,80 @@@ static INLINE TX_TYPE get_tx_type(PLANE const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; +#if CONFIG_EXT_INTRA + if (!is_inter_block(mbmi)) { + const int use_ext_intra_mode_info = + mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type]; + const EXT_INTRA_MODE ext_intra_mode = + mbmi->ext_intra_mode_info.ext_intra_mode[plane_type]; + const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y) ? + get_y_mode(mi, block_idx) : mbmi->uv_mode; + + if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) + return DCT_DCT; + +#if CONFIG_EXT_TX + if (mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y && + ALLOW_INTRA_EXT_TX) + return mbmi->tx_type; +#endif // CONFIG_EXT_TX + + if (use_ext_intra_mode_info) + return filter_intra_mode_to_tx_type_lookup[ext_intra_mode]; + + if (mode == DC_PRED) { + return DCT_DCT; + } else if (mode == TM_PRED) { + return ADST_ADST; + } else { + int angle = mode_to_angle_map[mode]; + if (mbmi->sb_type >= BLOCK_8X8) + angle += mbmi->angle_delta[plane_type] * ANGLE_STEP; + assert(angle > 0 && angle < 270); + if (angle == 135) + return ADST_ADST; + else if (angle < 45 || angle > 225) + return DCT_DCT; + else if (angle < 135) + return ADST_DCT; + else + return DCT_ADST; + } + } +#endif // CONFIG_EXT_INTRA + +#if CONFIG_EXT_TX +#if USE_IDTX_FOR_32X32 + if (xd->lossless[mbmi->segment_id] || tx_size > TX_32X32 || + (tx_size >= TX_32X32 && !is_inter_block(mbmi))) +#else + if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) +#endif + return DCT_DCT; + if (mbmi->sb_type >= BLOCK_8X8) { + if (plane_type == PLANE_TYPE_Y) { + if (is_inter_block(mbmi) || ALLOW_INTRA_EXT_TX) + return mbmi->tx_type; + } + if (is_inter_block(mbmi)) + // UV Inter only + return (mbmi->tx_type == IDTX && tx_size == TX_32X32 ? + DCT_DCT : mbmi->tx_type); + } + + // Sub8x8-Inter/Intra OR UV-Intra + if (is_inter_block(mbmi)) // Sub8x8-Inter + return DCT_DCT; + else // Sub8x8 Intra OR UV-Intra - return intra_mode_to_tx_type_lookup[plane_type == PLANE_TYPE_Y ? ++ return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y ? + get_y_mode(mi, block_idx) : mbmi->uv_mode]; +#else + (void) block_idx; if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] || - is_inter_block(mbmi) || tx_size >= TX_32X32) - mbmi->tx_size >= TX_32X32) ++ tx_size >= TX_32X32) return DCT_DCT; - return intra_mode_to_tx_type_lookup[get_y_mode(mi, block_idx)]; - + return mbmi->tx_type; +#endif // CONFIG_EXT_TX } void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); diff --cc vp10/common/entropy.h index c1de3b265,9a471c818..747d1ad97 --- a/vp10/common/entropy.h +++ b/vp10/common/entropy.h @@@ -21,8 -21,8 +21,8 @@@ extern "C" { #endif - #define DIFF_UPDATE_PROB 252 -#define DIFF_UPDATE_PROB 252 -#define GROUP_DIFF_UPDATE_PROB 252 ++#define DIFF_UPDATE_PROB 252 +#define GROUP_DIFF_UPDATE_PROB 252 // Coefficient token alphabet #define ZERO_TOKEN 0 // 0 Extra Bits 0+0 diff --cc vp10/common/entropymode.c index 0048c7ca4,78f3650f8..1b4fd267d --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@@ -1187,11 -324,28 +1187,33 @@@ static const struct segmentation_probs { 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128 }, }; -#endif +#if CONFIG_EXT_INTRA +static const vpx_prob default_ext_intra_probs[2] = {230, 230}; +#endif // CONFIG_EXT_INTRA + ++#if !CONFIG_EXT_TX + const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)] = { + -DCT_DCT, 2, + -ADST_ADST, 4, + -ADST_DCT, -DCT_ADST + }; + + static const vpx_prob default_intra_ext_tx_prob[EXT_TX_SIZES] + [TX_TYPES][TX_TYPES - 1] = { + {{240, 85, 128}, {4, 1, 248}, {4, 1, 8}, {4, 248, 128}}, + {{244, 85, 128}, {8, 2, 248}, {8, 2, 8}, {8, 248, 128}}, + {{248, 85, 128}, {16, 4, 248}, {16, 4, 8}, {16, 248, 128}}, + }; + + static const vpx_prob default_inter_ext_tx_prob[EXT_TX_SIZES] + [TX_TYPES - 1] = { + {160, 85, 128}, + {176, 85, 128}, + {192, 85, 128}, + }; ++#endif + static void init_mode_probs(FRAME_CONTEXT *fc) { vp10_copy(fc->uv_mode_prob, default_uv_probs); vp10_copy(fc->y_mode_prob, default_if_y_probs); @@@ -1202,40 -356,18 +1224,38 @@@ vp10_copy(fc->comp_ref_prob, default_comp_ref_p); vp10_copy(fc->single_ref_prob, default_single_ref_p); fc->tx_probs = default_tx_probs; +#if CONFIG_VAR_TX + vp10_copy(fc->txfm_partition_prob, default_txfm_partition_probs); +#endif vp10_copy(fc->skip_probs, default_skip_probs); +#if CONFIG_REF_MV + vp10_copy(fc->newmv_prob, default_newmv_prob); + vp10_copy(fc->zeromv_prob, default_zeromv_prob); + vp10_copy(fc->refmv_prob, default_refmv_prob); +#endif vp10_copy(fc->inter_mode_probs, default_inter_mode_probs); - #if CONFIG_EXT_TX - vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob); - vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob); - #endif // CONFIG_EXT_TX -#if CONFIG_MISC_FIXES +#if CONFIG_SUPERTX + vp10_copy(fc->supertx_prob, default_supertx_prob); +#endif // CONFIG_SUPERTX vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs); vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs); -#endif - vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob); +#if CONFIG_EXT_INTRA + vp10_copy(fc->ext_intra_probs, default_ext_intra_probs); +#endif // CONFIG_EXT_INTRA + vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob); ++ vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob); } +#if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4 +const vpx_tree_index vp10_switchable_interp_tree +[TREE_SIZE(SWITCHABLE_FILTERS)] = { + -EIGHTTAP, 2, + 4, -EIGHTTAP_SHARP, + -EIGHTTAP_SMOOTH, -EIGHTTAP_SMOOTH2, +}; +#else const vpx_tree_index vp10_switchable_interp_tree - [TREE_SIZE(SWITCHABLE_FILTERS)] = { +[TREE_SIZE(SWITCHABLE_FILTERS)] = { -EIGHTTAP, 2, -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP }; @@@ -1333,40 -453,22 +1353,55 @@@ void vp10_adapt_intra_frame_probs(VP10_ fc->skip_probs[i] = mode_mv_merge_probs( pre_fc->skip_probs[i], counts->skip[i]); +#if CONFIG_EXT_TX + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + if (use_inter_ext_tx_for_txsize[s][i]) { + vpx_tree_merge_probs(vp10_ext_tx_inter_tree[s], + pre_fc->inter_ext_tx_prob[s][i], + counts->inter_ext_tx[s][i], + fc->inter_ext_tx_prob[s][i]); + } + } + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + if (use_intra_ext_tx_for_txsize[s][i]) { + int j; + for (j = 0; j < INTRA_MODES; ++j) + vpx_tree_merge_probs(vp10_ext_tx_intra_tree[s], + pre_fc->intra_ext_tx_prob[s][i][j], + counts->intra_ext_tx[s][i][j], + fc->intra_ext_tx_prob[s][i][j]); + } + } + } ++#else + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + int j; + for (j = 0; j < TX_TYPES; ++j) + vpx_tree_merge_probs(vp10_ext_tx_tree, + pre_fc->intra_ext_tx_prob[i][j], + counts->intra_ext_tx[i][j], + fc->intra_ext_tx_prob[i][j]); + } + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + vpx_tree_merge_probs(vp10_ext_tx_tree, + pre_fc->inter_ext_tx_prob[i], + counts->inter_ext_tx[i], + fc->inter_ext_tx_prob[i]); + } +#endif // CONFIG_EXT_TX + +#if CONFIG_SUPERTX + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + int j; + for (j = 1; j < TX_SIZES; ++j) { + fc->supertx_prob[i][j] = mode_mv_merge_probs(pre_fc->supertx_prob[i][j], + counts->supertx[i][j]); + } + } +#endif // CONFIG_SUPERTX -#if CONFIG_MISC_FIXES if (cm->seg.temporal_update) { for (i = 0; i < PREDICTION_PROBS; i++) fc->seg.pred_probs[i] = mode_mv_merge_probs(pre_fc->seg.pred_probs[i], diff --cc vp10/common/entropymode.h index 11ba12f31,611d3ad13..a1ad2c4ee --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h @@@ -70,27 -58,17 +70,30 @@@ typedef struct frame_contexts vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; - vpx_prob single_ref_prob[REF_CONTEXTS][2]; - vpx_prob comp_ref_prob[REF_CONTEXTS]; + vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1]; + vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS-1]; struct tx_probs tx_probs; +#if CONFIG_VAR_TX + vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS]; +#endif vpx_prob skip_probs[SKIP_CONTEXTS]; nmv_context nmvc; -#if CONFIG_MISC_FIXES - struct segmentation_probs seg; -#endif + int initialized; +#if CONFIG_EXT_TX + vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1]; + vpx_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] + [TX_TYPES - 1]; ++#else + vpx_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1]; + vpx_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1]; - int initialized; +#endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + vpx_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES]; +#endif // CONFIG_SUPERTX + struct segmentation_probs seg; +#if CONFIG_EXT_INTRA + vpx_prob ext_intra_probs[PLANE_TYPES]; +#endif // CONFIG_EXT_INTRA } FRAME_CONTEXT; typedef struct FRAME_COUNTS { @@@ -112,27 -84,16 +115,30 @@@ unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; - unsigned int single_ref[REF_CONTEXTS][2][2]; - unsigned int comp_ref[REF_CONTEXTS][2]; + unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2]; + unsigned int comp_ref[REF_CONTEXTS][COMP_REFS-1][2]; struct tx_counts tx; +#if CONFIG_VAR_TX + unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2]; +#endif unsigned int skip[SKIP_CONTEXTS][2]; nmv_context_counts mv; -#if CONFIG_MISC_FIXES - struct seg_counts seg; -#endif +#if CONFIG_EXT_TX + unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; + unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] + [TX_TYPES]; ++#else + unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES]; + unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES]; +#endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2]; + unsigned int supertx_size[TX_SIZES]; +#endif // CONFIG_SUPERTX + struct seg_counts seg; +#if CONFIG_EXT_INTRA + unsigned int ext_intra[PLANE_TYPES][2]; +#endif // CONFIG_EXT_INTRA } FRAME_COUNTS; extern const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] @@@ -170,12 -123,8 +176,15 @@@ void vp10_tx_counts_to_branch_counts_16 void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, unsigned int (*ct_8x8p)[2]); +#if CONFIG_EXT_TX +extern const vpx_tree_index + vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER][TREE_SIZE(TX_TYPES)]; +extern const vpx_tree_index + vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA][TREE_SIZE(TX_TYPES)]; ++#else + extern const vpx_tree_index + vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)]; +#endif // CONFIG_EXT_TX static INLINE int vp10_ceil_log2(int n) { int i = 1, p = 2; diff --cc vp10/common/enums.h index 44de0a227,18c7d1629..f0d1ba258 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@@ -94,43 -94,15 +94,44 @@@ typedef enum ADST_DCT = 1, // ADST in vertical, DCT in horizontal DCT_ADST = 2, // DCT in vertical, ADST in horizontal ADST_ADST = 3, // ADST in both directions - TX_TYPES = 4 +#if CONFIG_EXT_TX + FLIPADST_DCT = 4, + DCT_FLIPADST = 5, + FLIPADST_FLIPADST = 6, + ADST_FLIPADST = 7, + FLIPADST_ADST = 8, + DST_DCT = 9, + DCT_DST = 10, + DST_ADST = 11, + ADST_DST = 12, + DST_FLIPADST = 13, + FLIPADST_DST = 14, + DST_DST = 15, + IDTX = 16, +#endif // CONFIG_EXT_TX + TX_TYPES, } TX_TYPE; + #define EXT_TX_SIZES 3 // number of sizes that use extended transforms + +#if CONFIG_EXT_TX +#define USE_DST2 1 - #define EXT_TX_SIZES 3 // number of sizes that use extended transforms +#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER +#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA +#endif // CONFIG_EXT_TX + typedef enum { VP9_LAST_FLAG = 1 << 0, +#if CONFIG_EXT_REFS + VP9_LAST2_FLAG = 1 << 1, + VP9_LAST3_FLAG = 1 << 2, + VP9_LAST4_FLAG = 1 << 3, + VP9_GOLD_FLAG = 1 << 4, + VP9_ALT_FLAG = 1 << 5, +#else VP9_GOLD_FLAG = 1 << 1, VP9_ALT_FLAG = 1 << 2, +#endif // CONFIG_EXT_REFS } VP9_REFFRAME; typedef enum { diff --cc vp10/common/thread_common.c index 3098b361a,0c7a1c22a..a1f17e935 --- a/vp10/common/thread_common.c +++ b/vp10/common/thread_common.c @@@ -456,36 -435,18 +456,47 @@@ void vp10_accumulate_frame_counts(VP10_ comps->fp[i] += comps_t->fp[i]; } +#if CONFIG_EXT_TX + for (i = 0; i < EXT_TX_SIZES; i++) { + int s, k; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + if (use_inter_ext_tx_for_txsize[s][i]) { + for (k = 0; k < TX_TYPES; k++) + cm->counts.inter_ext_tx[s][i][k] += counts->inter_ext_tx[s][i][k]; + } + } + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + if (use_intra_ext_tx_for_txsize[s][i]) { + int j; + for (j = 0; j < INTRA_MODES; ++j) + for (k = 0; k < TX_TYPES; k++) + cm->counts.intra_ext_tx[s][i][j][k] += + counts->intra_ext_tx[s][i][j][k]; + } + } + } ++#else + for (i = 0; i < EXT_TX_SIZES; i++) { + int j; + for (j = 0; j < TX_TYPES; ++j) + for (k = 0; k < TX_TYPES; k++) + cm->counts.intra_ext_tx[i][j][k] += counts->intra_ext_tx[i][j][k]; + } + for (i = 0; i < EXT_TX_SIZES; i++) { + for (k = 0; k < TX_TYPES; k++) + cm->counts.inter_ext_tx[i][k] += counts->inter_ext_tx[i][k]; + } +#endif // CONFIG_EXT_TX + +#if CONFIG_SUPERTX + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; i++) + for (j = 0; j < TX_SIZES; j++) + for (k = 0; k < 2; k++) + cm->counts.supertx[i][j][k] += counts->supertx[i][j][k]; + for (i = 0; i < TX_SIZES; i++) + cm->counts.supertx_size[i] += counts->supertx_size[i]; +#endif // CONFIG_SUPERTX -#if CONFIG_MISC_FIXES for (i = 0; i < PREDICTION_PROBS; i++) for (j = 0; j < 2; j++) cm->counts.seg.pred[i][j] += counts->seg.pred[i][j]; diff --cc vp10/decoder/decodeframe.c index 44b5bc406,1c3f18239..1bb569dc7 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@@ -2991,46 -2124,23 +2991,64 @@@ static size_t read_uncompressed_header( return sz; } +#if CONFIG_EXT_TX +static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j, k; + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_inter_ext_tx_for_txsize[s][i]) continue; + for (j = 0; j < num_ext_tx_set_inter[s] - 1; ++j) + vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[s][i][j]); + } + } + } + + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_intra_ext_tx_for_txsize[s][i]) continue; + for (j = 0; j < INTRA_MODES; ++j) + for (k = 0; k < num_ext_tx_set_intra[s] - 1; ++k) + vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[s][i][j][k]); + } + } + } +} ++#else + static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j, k; + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + for (j = 0; j < TX_TYPES; ++j) + for (k = 0; k < TX_TYPES - 1; ++k) + vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[i][j][k]); + } + } + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + for (k = 0; k < TX_TYPES - 1; ++k) + vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[i][k]); + } + } + } + +#endif // CONFIG_EXT_TX + +#if CONFIG_SUPERTX +static void read_supertx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j; + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + vp10_diff_update_prob(r, &fc->supertx_prob[i][j]); + } + } + } +} +#endif // CONFIG_SUPERTX + static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, size_t partition_size) { VP10_COMMON *const cm = &pbi->common; @@@ -3100,14 -2215,14 +3118,12 @@@ for (i = 0; i < INTRA_MODES - 1; ++i) vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]); -#if !CONFIG_MISC_FIXES - for (j = 0; j < PARTITION_CONTEXTS; ++j) - for (i = 0; i < PARTITION_TYPES - 1; ++i) - vp10_diff_update_prob(&r, &fc->partition_prob[j][i]); -#endif - read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); - #if CONFIG_EXT_TX read_ext_tx_probs(fc, &r); - #endif +#if CONFIG_SUPERTX + if (!xd->lossless[0]) + read_supertx_probs(fc, &r); +#endif } return vpx_reader_has_error(&r); @@@ -3148,13 -2263,10 +3164,14 @@@ static void debug_check_frame_counts(co assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx))); assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip))); assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); - - assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx, - sizeof(cm->counts.intra_ext_tx))); +#if CONFIG_EXT_TX assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx, sizeof(cm->counts.inter_ext_tx))); + assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx, + sizeof(cm->counts.intra_ext_tx))); ++#else ++ assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx, +#endif // CONFIG_EXT_TX } #endif // NDEBUG diff --cc vp10/decoder/decodemv.c index f4386e488,a28ae5592..ec9345382 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c @@@ -461,45 -296,20 +461,59 @@@ static void read_intra_frame_mode_info( } mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode); +#if CONFIG_EXT_INTRA + if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED && + bsize >= BLOCK_8X8) + mbmi->angle_delta[1] = + read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS; +#endif + mbmi->palette_mode_info.palette_size[0] = 0; + mbmi->palette_mode_info.palette_size[1] = 0; + if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools && + mbmi->mode == DC_PRED) + read_palette_mode_info(cm, xd, r); + +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, 0) > 1 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && + ALLOW_INTRA_EXT_TX) { + FRAME_COUNTS *counts = xd->counts; + int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, 0); + if (eset > 0) { + mbmi->tx_type = vpx_read_tree( + r, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]); + if (counts) + ++counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode] + [mbmi->tx_type]; + } + } else { + mbmi->tx_type = DCT_DCT; + } ++#else + if (mbmi->tx_size < TX_32X32 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + FRAME_COUNTS *counts = xd->counts; + TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode]; + mbmi->tx_type = vpx_read_tree( + r, vp10_ext_tx_tree, + cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]); + if (counts) + ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type]; + } else { + mbmi->tx_type = DCT_DCT; + } +#endif // CONFIG_EXT_TX + +#if CONFIG_EXT_INTRA + mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; + if (bsize >= BLOCK_8X8) + read_ext_intra_mode_info(cm, xd, r); +#endif // CONFIG_EXT_INTRA } static int read_mv_component(vpx_reader *r, @@@ -1015,39 -667,27 +1029,61 @@@ static void read_inter_frame_mode_info( else read_intra_block_mode_info(cm, xd, mi, r); +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, inter_block) > 1 && + cm->base_qindex > 0 && !mbmi->skip && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif // CONFIG_SUPERTX + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, + inter_block); + FRAME_COUNTS *counts = xd->counts; + + if (inter_block) { + if (eset > 0) { + mbmi->tx_type = + vpx_read_tree(r, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size]); + if (counts) + ++counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type]; + } + } else if (ALLOW_INTRA_EXT_TX) { + if (eset > 0) { + mbmi->tx_type = vpx_read_tree(r, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset] + [mbmi->tx_size][mbmi->mode]); + if (counts) + ++counts->intra_ext_tx[eset][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } + } else { + mbmi->tx_type = DCT_DCT; + } ++#else + if (mbmi->tx_size < TX_32X32 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + FRAME_COUNTS *counts = xd->counts; + if (inter_block) { + mbmi->tx_type = vpx_read_tree( + r, vp10_ext_tx_tree, + cm->fc->inter_ext_tx_prob[mbmi->tx_size]); + if (counts) + ++counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type]; + } else { + const TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode]; + mbmi->tx_type = vpx_read_tree( + r, vp10_ext_tx_tree, + cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]); + if (counts) + ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type]; + } + } else { + mbmi->tx_type = DCT_DCT; + } +#endif // CONFIG_EXT_TX } void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd, diff --cc vp10/encoder/bitstream.c index 4c0c6af22,ede8bb370..344f63de7 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@@ -79,32 -58,11 +79,36 @@@ static INLINE void write_uniform(vpx_wr } } +#if CONFIG_EXT_TX +static struct vp10_token ext_tx_inter_encodings[EXT_TX_SETS_INTER][TX_TYPES]; +static struct vp10_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES]; ++#else + static struct vp10_token ext_tx_encodings[TX_TYPES]; +#endif // CONFIG_EXT_TX void vp10_encode_token_init() { +#if CONFIG_EXT_TX + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + vp10_tokens_from_tree(ext_tx_inter_encodings[s], vp10_ext_tx_inter_tree[s]); + } + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + vp10_tokens_from_tree(ext_tx_intra_encodings[s], vp10_ext_tx_intra_tree[s]); + } ++#else + vp10_tokens_from_tree(ext_tx_encodings, vp10_ext_tx_tree); +#endif // CONFIG_EXT_TX +} + +#if CONFIG_SUPERTX +static int vp10_check_supertx(VP10_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + MODE_INFO *mi; + mi = cm->mi + (mi_row * cm->mi_stride + mi_col); + return mi[0].mbmi.tx_size == max_txsize_lookup[bsize] && + mi[0].mbmi.sb_type < bsize; } +#endif // CONFIG_SUPERTX static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode, const vpx_prob *probs) { @@@ -306,101 -157,48 +310,145 @@@ static void update_switchable_interp_pr counts->switchable_interp[j], SWITCHABLE_FILTERS, w); } ++ +#if CONFIG_EXT_TX +static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) { + const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - + vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); + int i, j; + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + int savings = 0; + int do_update = 0; + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_inter_ext_tx_for_txsize[s][i]) continue; + savings += prob_diff_update_savings( + vp10_ext_tx_inter_tree[s], cm->fc->inter_ext_tx_prob[s][i], + cm->counts.inter_ext_tx[s][i], num_ext_tx_set_inter[s]); + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_inter_ext_tx_for_txsize[s][i]) continue; + prob_diff_update(vp10_ext_tx_inter_tree[s], + cm->fc->inter_ext_tx_prob[s][i], + cm->counts.inter_ext_tx[s][i], + num_ext_tx_set_inter[s], w); + } + } + } + + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + int savings = 0; + int do_update = 0; + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_intra_ext_tx_for_txsize[s][i]) continue; + for (j = 0; j < INTRA_MODES; ++j) + savings += prob_diff_update_savings( + vp10_ext_tx_intra_tree[s], cm->fc->intra_ext_tx_prob[s][i][j], + cm->counts.intra_ext_tx[s][i][j], num_ext_tx_set_intra[s]); + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + if (!use_intra_ext_tx_for_txsize[s][i]) continue; + for (j = 0; j < INTRA_MODES; ++j) + prob_diff_update(vp10_ext_tx_intra_tree[s], + cm->fc->intra_ext_tx_prob[s][i][j], + cm->counts.intra_ext_tx[s][i][j], + num_ext_tx_set_intra[s], w); + } + } + } +} ++#else + static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) { + const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - + vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); + int i, j; + + int savings = 0; + int do_update = 0; + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + for (j = 0; j < TX_TYPES; ++j) + savings += prob_diff_update_savings( + vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j], + cm->counts.intra_ext_tx[i][j], TX_TYPES); + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + for (j = 0; j < TX_TYPES; ++j) + prob_diff_update(vp10_ext_tx_tree, + cm->fc->intra_ext_tx_prob[i][j], + cm->counts.intra_ext_tx[i][j], + TX_TYPES, w); + } + } + savings = 0; + do_update = 0; + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + savings += prob_diff_update_savings( + vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[i], + cm->counts.inter_ext_tx[i], TX_TYPES); + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + prob_diff_update(vp10_ext_tx_tree, + cm->fc->inter_ext_tx_prob[i], + cm->counts.inter_ext_tx[i], + TX_TYPES, w); + } + } + } +#endif // CONFIG_EXT_TX + +static void pack_palette_tokens(vpx_writer *w, TOKENEXTRA **tp, + BLOCK_SIZE bsize, int n) { + int rows = 4 * num_4x4_blocks_high_lookup[bsize]; + int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; + int i; + TOKENEXTRA *p = *tp; + + for (i = 0; i < rows * cols -1; ++i) { + vp10_write_token(w, vp10_palette_color_tree[n - 2], p->context_tree, + &palette_color_encodings[n - 2][p->token]); + ++p; + } + + *tp = p; +} + +#if CONFIG_SUPERTX +static void update_supertx_probs(VP10_COMMON *cm, vpx_writer *w) { + const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - + vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); + int i, j; + int savings = 0; + int do_update = 0; + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + savings += vp10_cond_prob_diff_update_savings(&cm->fc->supertx_prob[i][j], + cm->counts.supertx[i][j]); + } + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + vp10_cond_prob_diff_update(w, &cm->fc->supertx_prob[i][j], + cm->counts.supertx[i][j]); + } + } + } +} +#endif // CONFIG_SUPERTX static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp, const TOKENEXTRA *const stop, @@@ -829,63 -436,26 +877,86 @@@ static void pack_inter_mode_mvs(VP10_CO allow_hp); } } +#if CONFIG_EXT_INTERP + write_switchable_interp_filter(cpi, xd, w); +#endif // CONFIG_EXT_INTERP } + +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter) > 1 && + cm->base_qindex > 0 && !mbmi->skip && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif // CONFIG_SUPERTX + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + int eset = get_ext_tx_set(mbmi->tx_size, bsize, is_inter); + if (is_inter) { + if (eset > 0) + vp10_write_token(w, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size], + &ext_tx_inter_encodings[eset][mbmi->tx_type]); + } else if (ALLOW_INTRA_EXT_TX) { + if (eset > 0) + vp10_write_token( + w, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode], + &ext_tx_intra_encodings[eset][mbmi->tx_type]); + } + } ++#else + if (mbmi->tx_size < TX_32X32 && + cm->base_qindex > 0 && !mbmi->skip && ++#if CONFIG_SUPERTX ++ !supertx_enabled && ++#endif // CONFIG_SUPERTX + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + if (is_inter) { + vp10_write_token( + w, vp10_ext_tx_tree, + cm->fc->inter_ext_tx_prob[mbmi->tx_size], + &ext_tx_encodings[mbmi->tx_type]); + } else { + vp10_write_token( + w, vp10_ext_tx_tree, + cm->fc->intra_ext_tx_prob[mbmi->tx_size] + [intra_mode_to_tx_type_context[mbmi->mode]], + &ext_tx_encodings[mbmi->tx_type]); + } + } else { + if (!mbmi->skip) + assert(mbmi->tx_type == DCT_DCT); + } +#endif // CONFIG_EXT_TX +} + +static void write_palette_mode_info(const VP10_COMMON *cm, + const MACROBLOCKD *xd, + const MODE_INFO *const mi, + vpx_writer *w) { + const MB_MODE_INFO *const mbmi = &mi->mbmi; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const BLOCK_SIZE bsize = mbmi->sb_type; + const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info; + int palette_ctx = 0; + int n, i; + + n = pmi->palette_size[0]; + if (above_mi) + palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0); + if (left_mi) + palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0); + vpx_write(w, n > 0, + vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx]); + if (n > 0) { + vp10_write_token(w, vp10_palette_size_tree, + vp10_default_palette_y_size_prob[bsize - BLOCK_8X8], + &palette_size_encodings[n - 2]); + for (i = 0; i < n; ++i) + vpx_write_literal(w, pmi->palette_colors[i], + cm->bit_depth); + write_uniform(w, n, pmi->palette_first_color_idx[0]); + } } static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd, @@@ -930,36 -499,16 +1001,46 @@@ } write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mbmi->mode]); +#if CONFIG_EXT_INTRA + if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED && + bsize >= BLOCK_8X8) + write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + mbmi->angle_delta[1]); +#endif // CONFIG_EXT_INTRA + + if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools && + mbmi->mode == DC_PRED) + write_palette_mode_info(cm, xd, mi, w); + +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, bsize, 0) > 1 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && + ALLOW_INTRA_EXT_TX) { + int eset = get_ext_tx_set(mbmi->tx_size, bsize, 0); + if (eset > 0) + vp10_write_token( + w, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode], + &ext_tx_intra_encodings[eset][mbmi->tx_type]); + } ++#else + if (mbmi->tx_size < TX_32X32 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + vp10_write_token( + w, vp10_ext_tx_tree, + cm->fc->intra_ext_tx_prob[mbmi->tx_size] + [intra_mode_to_tx_type_context[mbmi->mode]], + &ext_tx_encodings[mbmi->tx_type]); + } +#endif // CONFIG_EXT_TX + +#if CONFIG_EXT_INTRA + if (bsize >= BLOCK_8X8) + write_ext_intra_mode_info(cm, mbmi, w); +#endif // CONFIG_EXT_INTRA } static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile, diff --cc vp10/encoder/bitstream.h index 028492084,b1da89f1d..9df03da99 --- a/vp10/encoder/bitstream.h +++ b/vp10/encoder/bitstream.h @@@ -18,10 -18,9 +18,11 @@@ extern "C" #include "vp10/encoder/encoder.h" + void vp10_encode_token_init(); void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size); +void vp10_encode_token_init(); + static INLINE int vp10_preserve_existing_gf(VP10_COMP *cpi) { return !cpi->multi_arf_allowed && cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref; diff --cc vp10/encoder/encodeframe.c index e516a5f9f,26ce5a1eb..c1301f9a9 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@@ -4366,837 -3024,16 +4366,849 @@@ static void encode_superblock(VP10_COM } ++td->counts->tx.tx_totals[mbmi->tx_size]; ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter_block(mbmi)) > 1 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + int eset = get_ext_tx_set(mbmi->tx_size, bsize, + is_inter_block(mbmi)); + if (eset > 0) { + if (is_inter_block(mbmi)) { + ++td->counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type]; + } else { + ++td->counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode] + [mbmi->tx_type]; + } + } + } ++#else + if (mbmi->tx_size < TX_32X32 && + cm->base_qindex > 0 && !mbmi->skip && + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + if (is_inter_block(mbmi)) { + ++td->counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type]; + } else { + ++td->counts->intra_ext_tx[mbmi->tx_size] + [intra_mode_to_tx_type_context[mbmi->mode]] + [mbmi->tx_type]; + } + } +#endif // CONFIG_EXT_TX +#if CONFIG_EXT_INTRA + if (bsize >= BLOCK_8X8 && !is_inter_block(mbmi)) { + if (mbmi->mode == DC_PRED) + ++td->counts->ext_intra[0] + [mbmi->ext_intra_mode_info.use_ext_intra_mode[0]]; + if (mbmi->uv_mode == DC_PRED) + ++td->counts->ext_intra[1] + [mbmi->ext_intra_mode_info.use_ext_intra_mode[1]]; + } +#endif // CONFIG_EXT_INTRA + } + +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 && + is_inter_block(mbmi) && !(mbmi->skip || seg_skip)) { + if (!output_enabled) + tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col); + } else { + TX_SIZE tx_size; + // The new intra coding scheme requires no change of transform size + if (is_inter_block(mbmi)) + tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], + max_txsize_lookup[bsize]); + else + tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4; + mbmi->tx_size = tx_size; + set_txfm_ctx(xd->left_txfm_context, tx_size, xd->n8_h); + set_txfm_ctx(xd->above_txfm_context, tx_size, xd->n8_w); + } +#endif +} + +#if CONFIG_SUPERTX +static int check_intra_b(PICK_MODE_CONTEXT *ctx) { + return !is_inter_mode((&ctx->mic)->mbmi.mode); +} + +static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize = bsize; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return 1; + + if (bsize >= BLOCK_8X8) + subsize = get_subsize(bsize, pc_tree->partitioning); + else + subsize = BLOCK_4X4; + + partition = partition_lookup[bsl][subsize]; + + switch (partition) { + case PARTITION_NONE: + return check_intra_b(&pc_tree->none); + break; + case PARTITION_VERT: + if (check_intra_b(&pc_tree->vertical[0])) + return 1; + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + if (check_intra_b(&pc_tree->vertical[1])) + return 1; + } + break; + case PARTITION_HORZ: + if (check_intra_b(&pc_tree->horizontal[0])) + return 1; + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + if (check_intra_b(&pc_tree->horizontal[1])) + return 1; + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + if (check_intra_b(pc_tree->leaf_split[0])) + return 1; + } else { + if (check_intra_sb(cpi, tile, mi_row, mi_col, subsize, + pc_tree->split[0])) + return 1; + if (check_intra_sb(cpi, tile, mi_row, mi_col + hbs, subsize, + pc_tree->split[1])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col, subsize, + pc_tree->split[2])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col + hbs, subsize, + pc_tree->split[3])) + return 1; + } + break; + default: + assert(0); } + return 0; +} + +static int check_supertx_b(TX_SIZE supertx_size, PICK_MODE_CONTEXT *ctx) { + return ctx->mic.mbmi.tx_size == supertx_size; +} + +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree) { + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + partition = pc_tree->partitioning; + subsize = get_subsize(bsize, partition); + switch (partition) { + case PARTITION_NONE: + return check_supertx_b(supertx_size, &pc_tree->none); + case PARTITION_VERT: + return check_supertx_b(supertx_size, &pc_tree->vertical[0]); + case PARTITION_HORZ: + return check_supertx_b(supertx_size, &pc_tree->horizontal[0]); + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) + return check_supertx_b(supertx_size, pc_tree->leaf_split[0]); + else + return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]); + default: + assert(0); + return 0; + } +} + +static void predict_superblock(VP10_COMP *cpi, ThreadData *td, + int mi_row_pred, int mi_col_pred, + BLOCK_SIZE bsize_pred, int b_sub8x8, int block) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi_8x8 = xd->mi[0]; + MODE_INFO *mi = mi_8x8; + MB_MODE_INFO *mbmi = &mi->mbmi; + int ref; + const int is_compound = has_second_ref(mbmi); + + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + + for (ref = 0; ref < 1 + is_compound; ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + mbmi->ref_frame[ref]); + vp10_setup_pre_planes(xd, ref, cfg, mi_row_pred, mi_col_pred, + &xd->block_refs[ref]->sf); + } + + if (!b_sub8x8) + vp10_build_inter_predictors_sb(xd, mi_row_pred, mi_col_pred, bsize_pred); + else + vp10_build_inter_predictors_sb_sub8x8(xd, mi_row_pred, mi_col_pred, + bsize_pred, block); +} + +static void predict_b_extend(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, + int mi_row_ori, int mi_col_ori, + int mi_row_pred, int mi_col_pred, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3], + BLOCK_SIZE bsize_ori, BLOCK_SIZE bsize_top, + BLOCK_SIZE bsize_pred, int output_enabled, + int b_sub8x8, int bextend) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + // (mi_row_top, mi_col_top, bsize_top): region of the top partition size + // block: sub location of sub8x8 blocks + // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8 + // bextend: 1: region to predict is an extension of ori; 0: not + + MACROBLOCK *const x = &td->mb; + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + int r = (mi_row_pred - mi_row_top) * MI_SIZE; + int c = (mi_col_pred - mi_col_top) * MI_SIZE; + const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top]; + const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top]; + + if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top || + mi_row_pred >= mi_row_top + mi_height_top || + mi_col_pred >= mi_col_top + mi_width_top || + mi_row_pred >= cm->mi_rows || mi_col_pred >= cm->mi_cols) + return; + + set_offsets_extend(cpi, td, tile, mi_row_pred, mi_col_pred, + mi_row_ori, mi_col_ori, bsize_pred, bsize_ori); + xd->plane[0].dst.stride = dst_stride[0]; + xd->plane[1].dst.stride = dst_stride[1]; + xd->plane[2].dst.stride = dst_stride[2]; + xd->plane[0].dst.buf = dst_buf[0] + + (r >> xd->plane[0].subsampling_y) * dst_stride[0] + + (c >> xd->plane[0].subsampling_x); + xd->plane[1].dst.buf = dst_buf[1] + + (r >> xd->plane[1].subsampling_y) * dst_stride[1] + + (c >> xd->plane[1].subsampling_x); + xd->plane[2].dst.buf = dst_buf[2] + + (r >> xd->plane[2].subsampling_y) * dst_stride[2] + + (c >> xd->plane[2].subsampling_x); + + predict_superblock(cpi, td, + mi_row_pred, mi_col_pred, bsize_pred, + b_sub8x8, block); + + if (output_enabled && !bextend) + update_stats(&cpi->common, td, 1); +} + +static void extend_dir(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, + uint8_t * dst_buf[3], int dst_stride[3], int dir) { + // dir: 0-lower, 1-upper, 2-left, 3-right + // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright + MACROBLOCKD *xd = &td->mb.e_mbd; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int xss = xd->plane[1].subsampling_x; + int yss = xd->plane[1].subsampling_y; + int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0; + + BLOCK_SIZE extend_bsize; + int unit, mi_row_pred, mi_col_pred; + + if (dir == 0 || dir == 1) { // lower and upper + extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss) ? + BLOCK_8X8 : BLOCK_16X8; + unit = num_8x8_blocks_wide_lookup[extend_bsize]; + mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1); + mi_col_pred = mi_col; + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, dst_buf, dst_stride, + bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + + if (mi_width > unit) { + int i; + for (i = 0; i < mi_width/unit - 1; i++) { + mi_col_pred += unit; + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } + } + } else if (dir == 2 || dir == 3) { // left and right + extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss) ? + BLOCK_8X8 : BLOCK_8X16; + unit = num_8x8_blocks_high_lookup[extend_bsize]; + mi_row_pred = mi_row; + mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1); + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + + if (mi_height > unit) { + int i; + for (i = 0; i < mi_height/unit - 1; i++) { + mi_row_pred += unit; + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } + } + } else { + extend_bsize = BLOCK_8X8; + mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1); + mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1); + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } +} + +static void extend_all(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, + uint8_t * dst_buf[3], int dst_stride[3]) { + assert(block >= 0 && block < 4); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 0); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 1); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 2); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 3); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 4); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 5); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 6); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 7); +} + + +// This function generates prediction for multiple blocks, between which +// discontinuity around boundary is reduced by smoothing masks. The basic +// smoothing mask is a soft step function along horz/vert direction. In more +// complicated case when a block is split into 4 subblocks, the basic mask is +// first applied to neighboring subblocks (2 pairs) in horizontal direction and +// then applied to the 2 masked prediction mentioned above in vertical direction +// If the block is split into more than one level, at every stage, masked +// prediction is stored in dst_buf[] passed from higher level. +static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + int i, ctx; + uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); + dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); + dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; + dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; + dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf3[0] = tmp_buf3; + dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; + dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + subsize = get_subsize(bsize, pc_tree->partitioning); + } else { + ctx = 0; + subsize = BLOCK_4X4; + } + partition = partition_lookup[bsl][subsize]; + if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize) + cm->counts.partition[ctx][partition]++; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + bsize, top_bsize, bsize, output_enabled, 0, 0); + extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride); + break; + case PARTITION_HORZ: + if (bsize == BLOCK_8X8) { + // Fisrt half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + + // Second half + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + + // Smooth + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, 0); + } else { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, subsize, output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride, 0); + + if (mi_row + hbs < cm->mi_rows) { + // Second half + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, subsize, top_bsize, subsize, + output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1, 1); + + // Smooth + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_HORZ, i); + } + } + } + break; + case PARTITION_VERT: + if (bsize == BLOCK_8X8) { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + + // Second half + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + + // Smooth + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, 0); + } else { + // bsize: not important, not useful + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, subsize, output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride, 3); + + + if (mi_col + hbs < cm->mi_cols) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, subsize, top_bsize, subsize, + output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1, 2); + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_VERT, i); + } + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf2, dst_stride2, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf3, dst_stride3, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + + if (bsize < top_bsize) { + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf2, dst_stride2); + extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf3, dst_stride3); + } + } else { + predict_sb_complex(cpi, td, tile, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf, dst_stride, + pc_tree->split[0]); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row, mi_col + hbs, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf1, dst_stride1, + pc_tree->split[1]); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf2, dst_stride2, + pc_tree->split[2]); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col + hbs, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf3, dst_stride3, + pc_tree->split[3]); + } + for (i = 0; i < MAX_MB_PLANE; i++) { + if (bsize == BLOCK_8X8 && i != 0) + continue; // Skip <4x4 chroma smoothing + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf1[i], + dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + if (mi_row + hbs < cm->mi_rows) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } + break; + default: + assert(0); + } + + + if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); +} + +static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, +#if CONFIG_EXT_TX + TX_TYPE *best_tx, +#endif // CONFIG_EXT_TX + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + int plane, pnskip, skippable, skippable_uv, rate_uv, this_rate, + base_rate = *tmp_rate; + int64_t sse, pnsse, sse_uv, this_dist, dist_uv; + uint8_t *dst_buf[3]; + int dst_stride[3]; + TX_SIZE tx_size; +#if CONFIG_EXT_TX + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + TX_TYPE tx_type, best_tx_nostx = xd->mi[0]->mbmi.tx_type; + int ext_tx_set; + int tmp_rate_tx = 0, skip_tx = 0; + int64_t tmp_dist_tx = 0, rd_tx, bestrd_tx = INT64_MAX; + uint8_t tmp_zcoeff_blk = 0; +#endif // CONFIG_EXT_TX + + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize, 0, pc_tree); + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + dst_buf[plane] = xd->plane[plane].dst.buf; + dst_stride[plane] = xd->plane[plane].dst.stride; + } + predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col, + 0, bsize, bsize, dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, x, mi_row, mi_col, bsize); +#if CONFIG_EXT_TX + *best_tx = DCT_DCT; +#endif + + // chroma + skippable_uv = 1; + rate_uv = 0; + dist_uv = 0; + sse_uv = 0; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + tx_size = max_txsize_lookup[bsize]; + tx_size = get_uv_tx_size_impl(tx_size, bsize, + cm->subsampling_x, cm->subsampling_y); + vp10_subtract_plane(x, bsize, plane); + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, &pnsse, + INT64_MAX, plane, bsize, tx_size, 0); + rate_uv += this_rate; + dist_uv += this_dist; + sse_uv += pnsse; + skippable_uv &= pnskip; + } + + // luma + tx_size = max_txsize_lookup[bsize]; + vp10_subtract_plane(x, bsize, 0); +#if CONFIG_EXT_TX + ext_tx_set = get_ext_tx_set(tx_size, bsize, 1); + for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + mbmi->tx_type = tx_type; + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + *best_tx == DCT_DCT) { + tx_type = IDTX - 1; + break; + } + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, + &pnsse, INT64_MAX, 0, bsize, tx_size, 0); + if (get_ext_tx_types(tx_size, bsize, 1) > 1 && + !xd->lossless[xd->mi[0]->mbmi.segment_id] && + this_rate != INT_MAX) { + if (ext_tx_set > 0) + this_rate += cpi->inter_tx_type_costs[ext_tx_set] + [mbmi->tx_size][mbmi->tx_type]; + } + *tmp_rate = rate_uv + this_rate; + *tmp_dist = dist_uv + this_dist; + sse = sse_uv + pnsse; + skippable = skippable_uv && pnskip; + if (skippable) { + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } else { + if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) + < RDCOST(x->rdmult, x->rddiv, 0, sse)) { + *tmp_rate += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0); + x->skip = 0; + } else { + *tmp_dist = sse; + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } + } + *tmp_rate += base_rate; + rd_tx = RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist); + if (rd_tx < bestrd_tx * 0.99 || tx_type == DCT_DCT) { + *best_tx = tx_type; + bestrd_tx = rd_tx; + tmp_rate_tx = *tmp_rate; + tmp_dist_tx = *tmp_dist; + skip_tx = x->skip; + tmp_zcoeff_blk = x->zcoeff_blk[tx_size][0]; + } + } + x->zcoeff_blk[tx_size][0] = tmp_zcoeff_blk; + *tmp_rate = tmp_rate_tx; + *tmp_dist = tmp_dist_tx; + x->skip = skip_tx; + xd->mi[0]->mbmi.tx_type = best_tx_nostx; + +#else // CONFIG_EXT_TX + + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, &pnsse, + INT64_MAX, 0, bsize, tx_size, 0); + *tmp_rate = rate_uv + this_rate; + *tmp_dist = dist_uv + this_dist; + sse = sse_uv + pnsse; + skippable = skippable_uv && pnskip; + if (skippable) { + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } else { + if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) + < RDCOST(x->rdmult, x->rddiv, 0, sse)) { + *tmp_rate += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0); + x->skip = 0; + } else { + *tmp_dist = sse; + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } + } + *tmp_rate += base_rate; +#endif // CONFIG_EXT_TX } +#endif // CONFIG_SUPERTX diff --cc vp10/encoder/encoder.h index 149e21aa3,bd6a00932..707255d66 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@@ -480,22 -462,13 +480,24 @@@ typedef struct VP10_COMP int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; + int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES]; + int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES]; + int palette_y_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS] + [PALETTE_COLORS]; + int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS] + [PALETTE_COLORS]; + + int multi_arf_allowed; + int multi_arf_enabled; + int multi_arf_last_grp_enabled; - +#if CONFIG_EXT_TX + int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; + int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] + [TX_TYPES]; ++#else + int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES]; + int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES]; +#endif // CONFIG_EXT_TX - - int multi_arf_allowed; - int multi_arf_enabled; - int multi_arf_last_grp_enabled; - #if CONFIG_VP9_TEMPORAL_DENOISING VP9_DENOISER denoiser; #endif diff --cc vp10/encoder/rd.c index a1fd00d53,f4fdb2417..5dcfa55d1 --- a/vp10/encoder/rd.c +++ b/vp10/encoder/rd.c @@@ -87,44 -84,17 +87,56 @@@ static void fill_mode_costs(VP10_COMP * vp10_cost_tokens(cpi->switchable_interp_costs[i], fc->switchable_interp_prob[i], vp10_switchable_interp_tree); + for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) { + vp10_cost_tokens(cpi->palette_y_size_cost[i], + vp10_default_palette_y_size_prob[i], + vp10_palette_size_tree); + vp10_cost_tokens(cpi->palette_uv_size_cost[i], + vp10_default_palette_uv_size_prob[i], + vp10_palette_size_tree); + } + + for (i = 0; i < PALETTE_MAX_SIZE - 1; ++i) + for (j = 0; j < PALETTE_COLOR_CONTEXTS; ++j) { + vp10_cost_tokens(cpi->palette_y_color_cost[i][j], + vp10_default_palette_y_color_prob[i][j], + vp10_palette_color_tree[i]); + vp10_cost_tokens(cpi->palette_uv_color_cost[i][j], + vp10_default_palette_uv_color_prob[i][j], + vp10_palette_color_tree[i]); + } +#if CONFIG_EXT_TX + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + int s; + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { + if (use_inter_ext_tx_for_txsize[s][i]) { + vp10_cost_tokens(cpi->inter_tx_type_costs[s][i], + fc->inter_ext_tx_prob[s][i], + vp10_ext_tx_inter_tree[s]); + } + } + for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { + if (use_intra_ext_tx_for_txsize[s][i]) { + for (j = 0; j < INTRA_MODES; ++j) + vp10_cost_tokens(cpi->intra_tx_type_costs[s][i][j], + fc->intra_ext_tx_prob[s][i][j], + vp10_ext_tx_intra_tree[s]); + } + } + } ++#else + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + for (j = 0; j < TX_TYPES; ++j) + vp10_cost_tokens(cpi->intra_tx_type_costs[i][j], + fc->intra_ext_tx_prob[i][j], + vp10_ext_tx_tree); + } + for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { + vp10_cost_tokens(cpi->inter_tx_type_costs[i], + fc->inter_ext_tx_prob[i], + vp10_ext_tx_tree); + } +#endif // CONFIG_EXT_TX } static void fill_token_costs(vp10_coeff_cost *c, diff --cc vp10/encoder/rdopt.c index d457199cc,90a716d2c..ca978ba32 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@@ -81,10 -54,8 +81,13 @@@ #define MIN_EARLY_TERM_INDEX 3 #define NEW_MV_DISCOUNT_FACTOR 8 +#if CONFIG_EXT_TX +const double ext_tx_th = 0.98; ++#else + const double ext_tx_th = 0.99; +#endif + + typedef struct { PREDICTION_MODE mode; MV_REFERENCE_FRAME ref_frame[2]; @@@ -790,109 -599,57 +793,152 @@@ static void choose_largest_tx_size(VP10 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - #if CONFIG_EXT_TX - TX_TYPE tx_type, best_tx_type = DCT_DCT; int r, s; int64_t d, psse, this_rd, best_rd = INT64_MAX; vpx_prob skip_prob = vp10_get_skip_prob(cm, xd); int s0 = vp10_cost_bit(skip_prob, 0); int s1 = vp10_cost_bit(skip_prob, 1); ++#if CONFIG_EXT_TX + int ext_tx_set; - const int is_inter = is_inter_block(mbmi); +#endif // CONFIG_EXT_TX + const int is_inter = is_inter_block(mbmi); mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size); + +#if CONFIG_EXT_TX + ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter); + + if (is_inter && + get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 && + !xd->lossless[mbmi->segment_id]) { + for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + if (is_inter) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + } else { + if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) { - if (tx_type != intra_mode_to_tx_type_lookup[mbmi->mode]) ++ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) + continue; + } + if (!ext_tx_used_intra[ext_tx_set][tx_type]) + continue; + } + + mbmi->tx_type = tx_type; + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + continue; + } + + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, + &psse, ref_best_rd, 0, bs, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + + if (r == INT_MAX) + continue; + if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) { + if (is_inter) { + if (ext_tx_set > 0) + r += cpi->inter_tx_type_costs[ext_tx_set] + [mbmi->tx_size][mbmi->tx_type]; + } else { + if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) + r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } + + if (s) + this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse); + else + this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d); + if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s) + this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse)); + + if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) { + best_rd = this_rd; + best_tx_type = mbmi->tx_type; + } + } + } + - mbmi->tx_type = best_tx_type; ++#else // CONFIG_EXT_TX + if (mbmi->tx_size < TX_32X32 && + !xd->lossless[mbmi->segment_id]) { + for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) { + mbmi->tx_type = tx_type; - txfm_rd_in_plane(x, &r, &d, &s, ++ txfm_rd_in_plane(x, ++#if CONFIG_VAR_TX ++ cpi, ++#endif ++ &r, &d, &s, + &psse, ref_best_rd, 0, bs, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + if (r == INT_MAX) + continue; + if (is_inter) + r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type]; + else + r += cpi->intra_tx_type_costs[mbmi->tx_size] + [intra_mode_to_tx_type_context[mbmi->mode]] + [mbmi->tx_type]; + if (s) + this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse); + else + this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d); + if (is_inter && !xd->lossless[mbmi->segment_id] && !s) + this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse)); + + if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) { + best_rd = this_rd; + best_tx_type = mbmi->tx_type; + } + } + } +#endif // CONFIG_EXT_TX + mbmi->tx_type = best_tx_type; - txfm_rd_in_plane(x, rate, distortion, skip, + + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + rate, distortion, skip, sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); + +#if CONFIG_EXT_TX + if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 && + !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) { + int ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter); + if (is_inter) { + if (ext_tx_set > 0) + *rate += cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size] + [mbmi->tx_type]; + } else { + if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) + *rate += + cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } ++#else + if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id] && + *rate != INT_MAX) { + if (is_inter) + *rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type]; + else + *rate += cpi->intra_tx_type_costs[mbmi->tx_size] + [intra_mode_to_tx_type_context[mbmi->mode]] + [mbmi->tx_type]; + } +#endif // CONFIG_EXT_TX } static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, @@@ -935,11 -688,8 +981,11 @@@ static void choose_tx_size_from_rd(VP10 TX_SIZE best_tx = max_tx_size; int start_tx, end_tx; const int tx_select = cm->tx_mode == TX_MODE_SELECT; - #if CONFIG_EXT_TX TX_TYPE tx_type, best_tx_type = DCT_DCT; + const int is_inter = is_inter_block(mbmi); ++#if CONFIG_EXT_TX + int ext_tx_set; +#endif // CONFIG_EXT_TX - const int is_inter = is_inter_block(mbmi); const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); assert(skip_prob > 0); @@@ -974,55 -722,23 +1018,69 @@@ r_tx_size += vp10_cost_one(tx_probs[m]); } +#if CONFIG_EXT_TX + ext_tx_set = get_ext_tx_set(n, bs, is_inter); + if (is_inter) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + } else { + if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) { - if (tx_type != intra_mode_to_tx_type_lookup[mbmi->mode]) ++ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) + continue; + } + if (!ext_tx_used_intra[ext_tx_set][tx_type]) + continue; + } + mbmi->tx_type = tx_type; + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + break; + } + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, + &sse, ref_best_rd, 0, bs, n, + cpi->sf.use_fast_coef_costing); + if (get_ext_tx_types(n, bs, is_inter) > 1 && + !xd->lossless[xd->mi[0]->mbmi.segment_id] && + r != INT_MAX) { + if (is_inter) { + if (ext_tx_set > 0) + r += cpi->inter_tx_type_costs[ext_tx_set] + [mbmi->tx_size][mbmi->tx_type]; + } else { + if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) + r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } +#else // CONFIG_EXT_TX + if (n >= TX_32X32 && tx_type != DCT_DCT) { + continue; + } + mbmi->tx_type = tx_type; - txfm_rd_in_plane(x, &r, &d, &s, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, &sse, ref_best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing); + if (n < TX_32X32 && + !xd->lossless[xd->mi[0]->mbmi.segment_id] && + r != INT_MAX) { + if (is_inter) + r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type]; + else + r += cpi->intra_tx_type_costs[mbmi->tx_size] + [intra_mode_to_tx_type_context[mbmi->mode]] + [mbmi->tx_type]; + } +#endif // CONFIG_EXT_TX if (r == INT_MAX) continue; @@@ -1066,26 -776,18 +1118,20 @@@ *rate = r; *skip = s; *psse = sse; - #if CONFIG_EXT_TX best_tx_type = mbmi->tx_type; - #endif // CONFIG_EXT_TX } } - #if CONFIG_EXT_TX } - #endif // CONFIG_EXT_TX mbmi->tx_size = best_tx; - #if CONFIG_EXT_TX mbmi->tx_type = best_tx_type; - if (mbmi->tx_size >= TX_32X32) - assert(mbmi->tx_type == DCT_DCT); - txfm_rd_in_plane(x, &r, &d, &s, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx, cpi->sf.use_fast_coef_costing); - #endif // CONFIG_EXT_TX } static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate, @@@ -1930,797 -1140,38 +1976,789 @@@ static int64_t rd_pick_intra_sby_mode(V int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; +#if CONFIG_EXT_INTRA + EXT_INTRA_MODE_INFO ext_intra_mode_info; + int is_directional_mode, rate_overhead, best_angle_delta = 0; + uint8_t directional_mode_skip_mask[INTRA_MODES]; + const int src_stride = x->plane[0].src.stride; + const uint8_t *src = x->plane[0].src.buf; + double hist[DIRECTIONAL_MODES]; +#endif // CONFIG_EXT_INTRA - #if CONFIG_EXT_TX TX_TYPE best_tx_type = DCT_DCT; - #endif // CONFIG_EXT_TX int *bmode_costs; + PALETTE_MODE_INFO palette_mode_info; + uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ? + x->palette_buffer->best_palette_color_map : NULL; + const int rows = 4 * num_4x4_blocks_high_lookup[bsize]; + const int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; + int palette_ctx = 0; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0); bmode_costs = cpi->y_mode_costs[A][L]; - memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); +#if CONFIG_EXT_INTRA + ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0; + mic->mbmi.angle_delta[0] = 0; + memset(directional_mode_skip_mask, 0, + sizeof(directional_mode_skip_mask[0]) * INTRA_MODES); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + highbd_angle_estimation(src, src_stride, rows, cols, hist); + else +#endif + angle_estimation(src, src_stride, rows, cols, hist); + + for (mode = 0; mode < INTRA_MODES; ++mode) { + if (mode != DC_PRED && mode != TM_PRED) { + int index = get_angle_index((double)mode_to_angle_map[mode]); + double score, weight = 1.0; + score = hist[index]; + if (index > 0) { + score += hist[index - 1] * 0.5; + weight += 0.5; + } + if (index < DIRECTIONAL_MODES - 1) { + score += hist[index + 1] * 0.5; + weight += 0.5; + } + score /= weight; + if (score < ANGLE_SKIP_THRESH) + directional_mode_skip_mask[mode] = 1; + } + } +#endif // CONFIG_EXT_INTRA + memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); + palette_mode_info.palette_size[0] = 0; + mic->mbmi.palette_mode_info.palette_size[0] = 0; + if (above_mi) + palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0); + if (left_mi) + palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0); + + /* Y Search for intra prediction mode */ + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { + mic->mbmi.mode = mode; +#if CONFIG_EXT_INTRA + is_directional_mode = (mode != DC_PRED && mode != TM_PRED); + if (is_directional_mode && directional_mode_skip_mask[mode]) + continue; + if (is_directional_mode) { + rate_overhead = bmode_costs[mode] + + write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0); + this_rate_tokenonly = INT_MAX; + this_rd = + rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rate_tokenonly, + &this_distortion, &s, bsize, rate_overhead, + best_rd); + } else { + mic->mbmi.angle_delta[0] = 0; + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, best_rd); + } +#endif // CONFIG_EXT_INTRA + super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, + &s, NULL, bsize, best_rd); + + if (this_rate_tokenonly == INT_MAX) + continue; + + this_rate = this_rate_tokenonly + bmode_costs[mode]; + if (cpi->common.allow_screen_content_tools && mode == DC_PRED) + this_rate += + vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8] + [palette_ctx], 0); +#if CONFIG_EXT_INTRA + if (mode == DC_PRED && ALLOW_FILTER_INTRA_MODES) + this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0); + if (is_directional_mode) + this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, + MAX_ANGLE_DELTAS + + mic->mbmi.angle_delta[0]); +#endif // CONFIG_EXT_INTRA + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < best_rd) { + mode_selected = mode; + best_rd = this_rd; + best_tx = mic->mbmi.tx_size; +#if CONFIG_EXT_INTRA + best_angle_delta = mic->mbmi.angle_delta[0]; +#endif // CONFIG_EXT_INTRA - #if CONFIG_EXT_TX + best_tx_type = mic->mbmi.tx_type; - #endif // CONFIG_EXT_TX + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + + if (cpi->common.allow_screen_content_tools) + rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED], + &palette_mode_info, best_palette_color_map, + &best_tx, &mode_selected, &best_rd); + +#if CONFIG_EXT_INTRA + if (!palette_mode_info.palette_size[0] > 0 && ALLOW_FILTER_INTRA_MODES) { + if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion, + skippable, bsize, bmode_costs[DC_PRED], + &best_rd)) { + mode_selected = mic->mbmi.mode; + best_tx = mic->mbmi.tx_size; + ext_intra_mode_info = mic->mbmi.ext_intra_mode_info; - #if CONFIG_EXT_TX + best_tx_type = mic->mbmi.tx_type; - #endif // CONFIG_EXT_TX + } + } + + mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = + ext_intra_mode_info.use_ext_intra_mode[0]; + if (ext_intra_mode_info.use_ext_intra_mode[0]) { + mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] = + ext_intra_mode_info.ext_intra_mode[0]; + } +#endif // CONFIG_EXT_INTRA + + mic->mbmi.mode = mode_selected; + mic->mbmi.tx_size = best_tx; +#if CONFIG_EXT_INTRA + mic->mbmi.angle_delta[0] = best_angle_delta; +#endif // CONFIG_EXT_INTRA - #if CONFIG_EXT_TX + mic->mbmi.tx_type = best_tx_type; - #endif // CONFIG_EXT_TX + mic->mbmi.palette_mode_info.palette_size[0] = + palette_mode_info.palette_size[0]; + if (palette_mode_info.palette_size[0] > 0) { + memcpy(mic->mbmi.palette_mode_info.palette_colors, + palette_mode_info.palette_colors, + PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0])); + memcpy(xd->plane[0].color_index_map, best_palette_color_map, + rows * cols * sizeof(best_palette_color_map[0])); + } + + return best_rd; +} + +#if CONFIG_VAR_TX +static void tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, + int blk_row, int blk_col, int plane, int block, + int plane_bsize, int coeff_ctx, + int *rate, int64_t *dist, int64_t *bsse, int *skip) { + MACROBLOCKD *xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; +#if CONFIG_VP9_HIGHBITDEPTH + const int ss_txfrm_size = tx_size << 1; + int64_t this_sse; + int shift = tx_size == TX_32X32 ? 0 : 2; + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); +#endif + unsigned int tmp_sse = 0; + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + + BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size]; + int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize]; + int src_stride = p->src.stride; + uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col]; + uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, rec_buffer_alloc_16[32 * 32]); + uint8_t *rec_buffer; +#else + DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]); +#endif + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size, + VP10_XFORM_QUANT_B); + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer_alloc_16); + vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + NULL, 0, NULL, 0, bh, bh, xd->bd); + } else { + rec_buffer = (uint8_t *)rec_buffer_alloc_16; + vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + NULL, 0, NULL, 0, bh, bh); + } +#else + vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + NULL, 0, NULL, 0, bh, bh); +#endif + + if (blk_row + (bh >> 2) > max_blocks_high || + blk_col + (bh >> 2) > max_blocks_wide) { + int idx, idy; + unsigned int this_sse; + int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row); + int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col); + for (idy = 0; idy < blocks_height; idy += 2) { + for (idx = 0; idx < blocks_width; idx += 2) { + cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, + src_stride, + rec_buffer + 4 * idy * 32 + 4 * idx, + 32, &this_sse); + tmp_sse += this_sse; + } + } + } else { + cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse); + } + +#if CONFIG_VP9_HIGHBITDEPTH + *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + &this_sse, xd->bd) >> shift; + *bsse += this_sse >> shift; +#else + *bsse += (int64_t)tmp_sse * 16; + + if (p->eobs[block] > 0) { + switch (tx_size) { + case TX_32X32: + vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_16X16: + vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_8X8: + vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_4X4: + vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type, + xd->lossless[xd->mi[0]->mbmi.segment_id]); + break; + default: + assert(0 && "Invalid transform size"); + break; + } + + if ((bh >> 2) + blk_col > max_blocks_wide || + (bh >> 2) + blk_row > max_blocks_high) { + int idx, idy; + unsigned int this_sse; + int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row); + int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col); + tmp_sse = 0; + for (idy = 0; idy < blocks_height; idy += 2) { + for (idx = 0; idx < blocks_width; idx += 2) { + cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, + src_stride, + rec_buffer + 4 * idy * 32 + 4 * idx, + 32, &this_sse); + tmp_sse += this_sse; + } + } + } else { + cpi->fn_ptr[txm_bsize].vf(src, src_stride, + rec_buffer, 32, &tmp_sse); + } + } + *dist += (int64_t)tmp_sse * 16; +#endif // CONFIG_VP9_HIGHBITDEPTH + + *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size, + scan_order->scan, scan_order->neighbors, 0); + *skip &= (p->eobs[block] == 0); +} + +static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x, + int blk_row, int blk_col, int plane, int block, + TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, + TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left, + int *rate, int64_t *dist, + int64_t *bsse, int *skip, + int64_t ref_best_rd, int *is_cost_valid) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + int64_t this_rd = INT64_MAX; + ENTROPY_CONTEXT *pta = ta + blk_col; + ENTROPY_CONTEXT *ptl = tl + blk_row; + ENTROPY_CONTEXT stxa = 0, stxl = 0; + int coeff_ctx, i; + int ctx = txfm_partition_context(tx_above + (blk_col >> 1), + tx_left + (blk_row >> 1), tx_size); + + int64_t sum_dist = 0, sum_bsse = 0; + int64_t sum_rd = INT64_MAX; + int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1); + int all_skip = 1; + int tmp_eob = 0; + int zero_blk_rate; + + if (ref_best_rd < 0) { + *is_cost_valid = 0; + return; + } + + switch (tx_size) { + case TX_4X4: + stxa = pta[0]; + stxl = ptl[0]; + break; + case TX_8X8: + stxa = !!*(const uint16_t *)&pta[0]; + stxl = !!*(const uint16_t *)&ptl[0]; + break; + case TX_16X16: + stxa = !!*(const uint32_t *)&pta[0]; + stxl = !!*(const uint32_t *)&ptl[0]; + break; + case TX_32X32: + stxa = !!*(const uint64_t *)&pta[0]; + stxl = !!*(const uint64_t *)&ptl[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + coeff_ctx = combine_entropy_contexts(stxa, stxl); + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + *rate = 0; + *dist = 0; + *bsse = 0; + *skip = 1; + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + zero_blk_rate = + x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN]; + + if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) { + mbmi->inter_tx_size[tx_idx] = tx_size; + tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, + plane_bsize, coeff_ctx, rate, dist, bsse, skip); + + if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >= + RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) || *skip == 1) && + !xd->lossless[mbmi->segment_id]) { + *rate = zero_blk_rate; + *dist = *bsse; + *skip = 1; + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1; + p->eobs[block] = 0; + } else { + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0; + *skip = 0; + } + + if (tx_size > TX_4X4) + *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0); + this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist); + tmp_eob = p->eobs[block]; + } + + if (tx_size > TX_4X4) { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_height_log2_lookup[bsize]; + int sub_step = 1 << (2 * (tx_size - 1)); + int i; + int this_rate; + int64_t this_dist; + int64_t this_bsse; + int this_skip; + int this_cost_valid = 1; + int64_t tmp_rd = 0; + + --bsl; + for (i = 0; i < 4 && this_cost_valid; ++i) { + int offsetr = (i >> 1) << bsl; + int offsetc = (i & 0x01) << bsl; + select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc, + plane, block + i * sub_step, tx_size - 1, + plane_bsize, ta, tl, tx_above, tx_left, + &this_rate, &this_dist, + &this_bsse, &this_skip, + ref_best_rd - tmp_rd, &this_cost_valid); + sum_rate += this_rate; + sum_dist += this_dist; + sum_bsse += this_bsse; + all_skip &= this_skip; + tmp_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (this_rd < tmp_rd) + break; + } + if (this_cost_valid) + sum_rd = tmp_rd; + } + + if (this_rd < sum_rd) { + int idx, idy; + for (i = 0; i < (1 << tx_size); ++i) + pta[i] = ptl[i] = !(tmp_eob == 0); + txfm_partition_update(tx_above + (blk_col >> 1), + tx_left + (blk_row >> 1), tx_size); + mbmi->inter_tx_size[tx_idx] = tx_size; + + for (idy = 0; idy < (1 << tx_size) / 2; ++idy) + for (idx = 0; idx < (1 << tx_size) / 2; ++idx) + mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size; + mbmi->tx_size = tx_size; + if (this_rd == INT64_MAX) + *is_cost_valid = 0; + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip; + } else { + *rate = sum_rate; + *dist = sum_dist; + *bsse = sum_bsse; + *skip = all_skip; + if (sum_rd == INT64_MAX) + *is_cost_valid = 0; + } +} + +static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + int is_cost_valid = 1; + int64_t this_rd = 0; + + if (ref_best_rd < 0) + is_cost_valid = 0; + + *rate = 0; + *distortion = 0; + *sse = 0; + *skippable = 1; + + if (is_cost_valid) { + const struct macroblockd_plane *const pd = &xd->plane[0]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_txsize_lookup[plane_bsize] * 2); + ENTROPY_CONTEXT ctxa[16], ctxl[16]; + TXFM_CONTEXT tx_above[8], tx_left[8]; + + int pnrate = 0, pnskip = 1; + int64_t pndist = 0, pnsse = 0; + + vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl); + memcpy(tx_above, xd->above_txfm_context, + sizeof(TXFM_CONTEXT) * (mi_width >> 1)); + memcpy(tx_left, xd->left_txfm_context, + sizeof(TXFM_CONTEXT) * (mi_height >> 1)); + + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + select_tx_block(cpi, x, idy, idx, 0, block, + max_txsize_lookup[plane_bsize], plane_bsize, + ctxa, ctxl, tx_above, tx_left, + &pnrate, &pndist, &pnsse, &pnskip, + ref_best_rd - this_rd, &is_cost_valid); + *rate += pnrate; + *distortion += pndist; + *sse += pnsse; + *skippable &= pnskip; + this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist), + RDCOST(x->rdmult, x->rddiv, 0, pnsse)); + block += step; + } + } + } + + this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion), + RDCOST(x->rdmult, x->rddiv, 0, *sse)); + if (this_rd > ref_best_rd) + is_cost_valid = 0; + + if (!is_cost_valid) { + // reset cost value + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } +} + +#if CONFIG_EXT_TX +static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; + const VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + int64_t rd = INT64_MAX; + int64_t best_rd = INT64_MAX; + TX_TYPE tx_type, best_tx_type = DCT_DCT; + int ext_tx_set; + const int is_inter = is_inter_block(mbmi); + vpx_prob skip_prob = vp10_get_skip_prob(cm, xd); + int s0 = vp10_cost_bit(skip_prob, 0); + int s1 = vp10_cost_bit(skip_prob, 1); + TX_SIZE best_tx_size[64]; + TX_SIZE best_tx = TX_SIZES; + uint8_t best_blk_skip[256]; + const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4); + int idx, idy; + + *distortion = INT64_MAX; + *rate = INT_MAX; + *skippable = 0; + *sse = INT64_MAX; + + ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter); + + for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { + int this_rate = 0; + int this_skip = 1; + int64_t this_dist = 0; + int64_t this_sse = 0; + + if (is_inter) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + } else { + if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) { - if (tx_type != intra_mode_to_tx_type_lookup[mbmi->mode]) ++ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) + continue; + } + if (!ext_tx_used_intra[ext_tx_set][tx_type]) + continue; + } + + mbmi->tx_type = tx_type; + + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + break; + } + + inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse, + bsize, ref_best_rd); + + if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 && + !xd->lossless[xd->mi[0]->mbmi.segment_id] && + this_rate != INT_MAX) { + if (is_inter) { + if (ext_tx_set > 0) + this_rate += cpi->inter_tx_type_costs[ext_tx_set] + [max_tx_size][mbmi->tx_type]; + } else { + if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) + this_rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size] + [mbmi->mode][mbmi->tx_type]; + } + } + + if (this_rate == INT_MAX) + continue; + + if (this_skip) + rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse); + else + rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist); + + if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip) + rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse)); + + if (rd < + (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * + best_rd) { + best_rd = rd; + *distortion = this_dist; + *rate = this_rate; + *skippable = this_skip; + *sse = this_sse; + best_tx_type = mbmi->tx_type; + best_tx = mbmi->tx_size; + memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4); + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + best_tx_size[idy * 8 + idx] = mbmi->inter_tx_size[idy * 8 + idx]; + } + } + + mbmi->tx_type = best_tx_type; + for (idy = 0; idy < xd->n8_h; ++idy) + for (idx = 0; idx < xd->n8_w; ++idx) + mbmi->inter_tx_size[idy * 8 + idx] = best_tx_size[idy * 8 + idx]; + mbmi->tx_size = best_tx; + memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4); +} +#endif + +static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, + int blk_row, int blk_col, int plane, int block, + TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx, + int *rate, int64_t *dist, int64_t *bsse, int *skip) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, + 0, 0) : + mbmi->inter_tx_size[tx_idx]; + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + int coeff_ctx, i; + ENTROPY_CONTEXT *ta = above_ctx + blk_col; + ENTROPY_CONTEXT *tl = left_ctx + blk_row; + switch (tx_size) { + case TX_4X4: + break; + case TX_8X8: + ta[0] = !!*(const uint16_t *)&ta[0]; + tl[0] = !!*(const uint16_t *)&tl[0]; + break; + case TX_16X16: + ta[0] = !!*(const uint32_t *)&ta[0]; + tl[0] = !!*(const uint32_t *)&tl[0]; + break; + case TX_32X32: + ta[0] = !!*(const uint64_t *)&ta[0]; + tl[0] = !!*(const uint64_t *)&tl[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + coeff_ctx = combine_entropy_contexts(ta[0], tl[0]); + tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, + plane_bsize, coeff_ctx, rate, dist, bsse, skip); + for (i = 0; i < (1 << tx_size); ++i) { + ta[i] = !(p->eobs[block] == 0); + tl[i] = !(p->eobs[block] == 0); + } + } else { + int bsl = b_width_log2_lookup[bsize]; + int step = 1 << (2 * (tx_size - 1)); + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + int offsetr = (i >> 1) << bsl; + int offsetc = (i & 0x01) << bsl; + tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane, + block + i * step, tx_size - 1, plane_bsize, + above_ctx, left_ctx, rate, dist, bsse, skip); + } + } +} + +// Return value 0: early termination triggered, no valid rd cost available; +// 1: rd cost values are valid. +static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + int plane; + int is_cost_valid = 1; + int64_t this_rd; + + if (ref_best_rd < 0) + is_cost_valid = 0; - /* Y Search for intra prediction mode */ - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - mic->mbmi.mode = mode; + if (is_inter_block(mbmi) && is_cost_valid) { + int plane; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) + vp10_subtract_plane(x, bsize, plane); + } - super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, - &s, NULL, bsize, best_rd); + *rate = 0; + *distortion = 0; + *sse = 0; + *skippable = 1; - if (this_rate_tokenonly == INT_MAX) - continue; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_txsize_lookup[plane_bsize] * 2); + int pnrate = 0, pnskip = 1; + int64_t pndist = 0, pnsse = 0; + ENTROPY_CONTEXT ta[16], tl[16]; + + vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl); + + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + tx_block_rd(cpi, x, idy, idx, plane, block, + max_txsize_lookup[plane_bsize], plane_bsize, ta, tl, + &pnrate, &pndist, &pnsse, &pnskip); + block += step; + } + } - this_rate = this_rate_tokenonly + bmode_costs[mode]; - this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + if (pnrate == INT_MAX) { + is_cost_valid = 0; + break; + } - if (this_rd < best_rd) { - mode_selected = mode; - best_rd = this_rd; - best_tx = mic->mbmi.tx_size; - best_tx_type = mic->mbmi.tx_type; - *rate = this_rate; - *rate_tokenonly = this_rate_tokenonly; - *distortion = this_distortion; - *skippable = s; + *rate += pnrate; + *distortion += pndist; + *sse += pnsse; + *skippable &= pnskip; + + this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion), + RDCOST(x->rdmult, x->rddiv, 0, *sse)); + + if (this_rd > ref_best_rd) { + is_cost_valid = 0; + break; } } diff --cc vp10/encoder/subexp.h index bad23d654,091334f1f..64eb27531 --- a/vp10/encoder/subexp.h +++ b/vp10/encoder/subexp.h @@@ -36,9 -36,9 +36,8 @@@ int vp10_prob_diff_update_savings_searc vpx_prob *bestp, vpx_prob upd, int stepsize); - int vp10_cond_prob_diff_update_savings(vpx_prob *oldp, const unsigned int ct[2]); - #ifdef __cplusplus } // extern "C" #endif