From: Debargha Mukherjee Date: Fri, 20 Nov 2015 00:51:16 +0000 (-0800) Subject: Super transform - ported from nextgen branch X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3787b17439d00d3684646e97c18d231860dae8b9;p=libvpx Super transform - ported from nextgen branch Various additional changes were made to make the experiment compatible with misc_fixes. derflr: +0.979% hevcmr: +0.865% Speed-wise with --enable-supertx the encoder is only about 10% slower than without. Decoding impact is about 30% slowdown. Note this does not work with ext-tx or var-tx yet. That is a TODO. Change-Id: If25af4241a7a9efbd28f58eda3c4f044c7a7ef4b --- diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index fc7138715..66e29d41b 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -45,6 +45,8 @@ typedef enum { #define IsInterpolatingFilter(filter) (1) #endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS +#define MAXTXLEN 32 + static INLINE int is_inter_mode(PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWMV; } @@ -291,6 +293,14 @@ static const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { ADST_ADST, // TM }; +#if CONFIG_SUPERTX +static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) { + return (int)mbmi->tx_size > + VPXMIN(b_width_log2_lookup[mbmi->sb_type], + b_height_log2_lookup[mbmi->sb_type]); +} +#endif // CONFIG_SUPERTX + #if CONFIG_EXT_TX #define ALLOW_INTRA_EXT_TX 1 @@ -469,8 +479,18 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi, const struct macroblockd_plane *pd) { +#if CONFIG_SUPERTX + if (!supertx_enabled(mbmi)) { + return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x, + pd->subsampling_y); + } else { + return uvsupertx_size_lookup[mbmi->tx_size][pd->subsampling_x] + [pd->subsampling_y]; + } +#else return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x, pd->subsampling_y); +#endif // CONFIG_SUPERTX } static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, diff --git a/vp10/common/common_data.h b/vp10/common/common_data.h index 334489c9d..84476fa0a 100644 --- a/vp10/common/common_data.h +++ b/vp10/common/common_data.h @@ -170,6 +170,21 @@ static const struct { {0, 0 }, // 64X64 - {0b0000, 0b0000} }; +#if CONFIG_SUPERTX +static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = { + // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 + // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 + {{TX_4X4, TX_4X4}, {TX_4X4, TX_4X4}}, + {{TX_8X8, TX_4X4}, {TX_4X4, TX_4X4}}, + {{TX_16X16, TX_8X8}, {TX_8X8, TX_8X8}}, + {{TX_32X32, TX_16X16}, {TX_16X16, TX_16X16}}, +}; + +static const int partition_supertx_context_lookup[PARTITION_TYPES] = { + -1, 0, 0, 1 +}; +#endif // CONFIG_SUPERTX + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index aa3e10b60..02287ec7a 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -1174,6 +1174,14 @@ default_intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES] }; #endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX +static const vpx_prob default_supertx_prob[PARTITION_SUPERTX_CONTEXTS] + [TX_SIZES] = { + { 1, 160, 160, 170 }, + { 1, 200, 200, 210 }, +}; +#endif // CONFIG_SUPERTX + // FIXME(someone) need real defaults here static const struct segmentation_probs default_seg_probs = { { 128, 128, 128, 128, 128, 128, 128 }, @@ -1208,6 +1216,9 @@ static void init_mode_probs(FRAME_CONTEXT *fc) { vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob); vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob); #endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + vp10_copy(fc->supertx_prob, default_supertx_prob); +#endif // CONFIG_SUPERTX vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs); vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs); #if CONFIG_EXT_INTRA @@ -1346,6 +1357,16 @@ void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) { } #endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + int j; + for (j = 1; j < TX_SIZES; ++j) { + fc->supertx_prob[i][j] = mode_mv_merge_probs(pre_fc->supertx_prob[i][j], + counts->supertx[i][j]); + } + } +#endif // CONFIG_SUPERTX + if (cm->seg.temporal_update) { for (i = 0; i < PREDICTION_PROBS; i++) fc->seg.pred_probs[i] = mode_mv_merge_probs(pre_fc->seg.pred_probs[i], diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index 1cc583846..11ba12f31 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h @@ -84,6 +84,9 @@ typedef struct frame_contexts { vpx_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] [TX_TYPES - 1]; #endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + vpx_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES]; +#endif // CONFIG_SUPERTX struct segmentation_probs seg; #if CONFIG_EXT_INTRA vpx_prob ext_intra_probs[PLANE_TYPES]; @@ -122,6 +125,10 @@ typedef struct FRAME_COUNTS { unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] [TX_TYPES]; #endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2]; + unsigned int supertx_size[TX_SIZES]; +#endif // CONFIG_SUPERTX struct seg_counts seg; #if CONFIG_EXT_INTRA unsigned int ext_intra[PLANE_TYPES][2]; diff --git a/vp10/common/enums.h b/vp10/common/enums.h index 5dc4ceff8..c283222ea 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@ -246,6 +246,11 @@ typedef TX_SIZE TXFM_CONTEXT; #define COMP_REFS 2 #endif // CONFIG_EXT_REFS +#if CONFIG_SUPERTX +#define PARTITION_SUPERTX_CONTEXTS 2 +#define MAX_SUPERTX_BLOCK_SIZE BLOCK_32X32 +#endif // CONFIG_SUPERTX + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index e3c50eadf..20d724daf 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -787,10 +787,18 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // we only update u and v masks on the first block. static void build_y_mask(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif // CONFIG_SUPERTX LOOP_FILTER_MASK *lfm) { const MB_MODE_INFO *mbmi = &mi->mbmi; - const BLOCK_SIZE block_size = mbmi->sb_type; const TX_SIZE tx_size_y = mbmi->tx_size; +#if CONFIG_SUPERTX + const BLOCK_SIZE block_size = + supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type; +#else + const BLOCK_SIZE block_size = mbmi->sb_type; +#endif const int filter_level = get_filter_level(lfi_n, mbmi); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; @@ -899,6 +907,10 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, break; case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); +#if CONFIG_SUPERTX + if (supertx_enabled(&mip[0]->mbmi)) + break; +#endif if (mi_32_row_offset + 2 >= max_rows) continue; mip2 = mip + mode_info_stride * 2; @@ -906,12 +918,22 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, break; case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); +#if CONFIG_SUPERTX + if (supertx_enabled(&mip[0]->mbmi)) + break; +#endif if (mi_32_col_offset + 2 >= max_cols) continue; mip2 = mip + 2; build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); break; default: +#if CONFIG_SUPERTX + if (mip[0]->mbmi.tx_size == TX_32X32) { + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + break; + } +#endif for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; @@ -928,23 +950,45 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_16X8: +#if CONFIG_SUPERTX + if (supertx_enabled(&mip[0]->mbmi)) + break; +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_row_offset + 1 >= max_rows) continue; mip2 = mip + mode_info_stride; - build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); + build_y_mask(lfi_n, mip2[0], shift_y+8, +#if CONFIG_SUPERTX + 0, +#endif + lfm); break; case BLOCK_8X16: +#if CONFIG_SUPERTX + if (supertx_enabled(&mip[0]->mbmi)) + break; +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_col_offset +1 >= max_cols) continue; mip2 = mip + 1; - build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); + build_y_mask(lfi_n, mip2[0], shift_y+1, +#if CONFIG_SUPERTX + 0, +#endif + lfm); break; default: { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0]; +#if CONFIG_SUPERTX + if (mip[0]->mbmi.tx_size == TX_16X16) { + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + break; + } +#endif build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); mip += offset[0]; for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { @@ -959,7 +1003,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, if (mi_8_col_offset >= max_cols || mi_8_row_offset >= max_rows) continue; - build_y_mask(lfi_n, mip[0], shift_y, lfm); + build_y_mask(lfi_n, mip[0], shift_y, +#if CONFIG_SUPERTX + supertx_enabled(&mip[0]->mbmi), +#endif + lfm); } break; } diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c index 88c7569f6..241b9aa7e 100644 --- a/vp10/common/reconinter.c +++ b/vp10/common/reconinter.c @@ -264,3 +264,227 @@ void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx, } } } + +#if CONFIG_SUPERTX +static const uint8_t mask_8[8] = { + 64, 64, 62, 52, 12, 2, 0, 0 +}; + +static const uint8_t mask_16[16] = { + 63, 62, 60, 58, 55, 50, 43, 36, 28, 21, 14, 9, 6, 4, 2, 1 +}; + +static const uint8_t mask_32[32] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 61, 57, 52, 45, 36, + 28, 19, 12, 7, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const uint8_t mask_8_uv[8] = { + 64, 64, 62, 52, 12, 2, 0, 0 +}; + +static const uint8_t mask_16_uv[16] = { + 64, 64, 64, 64, 61, 53, 45, 36, 28, 19, 11, 3, 0, 0, 0, 0 +}; + +static const uint8_t mask_32_uv[32] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 60, 54, 46, 36, + 28, 18, 10, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static void generate_1dmask(int length, uint8_t *mask, int plane) { + switch (length) { + case 8: + memcpy(mask, plane ? mask_8_uv : mask_8, length); + break; + case 16: + memcpy(mask, plane ? mask_16_uv : mask_16, length); + break; + case 32: + memcpy(mask, plane ? mask_32_uv : mask_32, length); + break; + default: + assert(0); + } +} + + +void vp10_build_masked_inter_predictor_complex( + MACROBLOCKD *xd, + uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride, + const struct macroblockd_plane *pd, int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + PARTITION_TYPE partition, int plane) { + int i, j; + uint8_t mask[MAXTXLEN]; + int top_w = 4 << b_width_log2_lookup[top_bsize], + top_h = 4 << b_height_log2_lookup[top_bsize]; + int w = 4 << b_width_log2_lookup[bsize], h = 4 << b_height_log2_lookup[bsize]; + int w_offset = (mi_col - mi_col_ori) << 3, + h_offset = (mi_row - mi_row_ori) << 3; + +#if CONFIG_VP9_HIGHBITDEPTH + uint16_t *dst16= CONVERT_TO_SHORTPTR(dst); + uint16_t *dst216 = CONVERT_TO_SHORTPTR(dst2); + int b_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; +#endif // CONFIG_VP9_HIGHBITDEPTH + + top_w >>= pd->subsampling_x; + top_h >>= pd->subsampling_y; + w >>= pd->subsampling_x; + h >>= pd->subsampling_y; + w_offset >>= pd->subsampling_x; + h_offset >>= pd->subsampling_y; + + switch (partition) { + case PARTITION_HORZ: + { +#if CONFIG_VP9_HIGHBITDEPTH + if (b_hdb) { + uint16_t *dst_tmp = dst16 + h_offset * dst_stride; + uint16_t *dst2_tmp = dst216 + h_offset * dst2_stride; + generate_1dmask(h, mask + h_offset, + plane && xd->plane[plane].subsampling_y); + + for (i = h_offset; i < h_offset + h; i++) { + for (j = 0; j < top_w; j++) { + const int m = mask[i]; assert(m >= 0 && m <= 64); + if (m == 64) + continue; + + if (m == 0) + dst_tmp[j] = dst2_tmp[j]; + else + dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + } + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } + + for (; i < top_h; i ++) { + memcpy(dst_tmp, dst2_tmp, top_w * sizeof(uint16_t)); + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + uint8_t *dst_tmp = dst + h_offset * dst_stride; + uint8_t *dst2_tmp = dst2 + h_offset * dst2_stride; + generate_1dmask(h, mask + h_offset, + plane && xd->plane[plane].subsampling_y); + + for (i = h_offset; i < h_offset + h; i++) { + for (j = 0; j < top_w; j++) { + const int m = mask[i]; assert(m >= 0 && m <= 64); + if (m == 64) + continue; + + if (m == 0) + dst_tmp[j] = dst2_tmp[j]; + else + dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + } + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } + + for (; i < top_h; i ++) { + memcpy(dst_tmp, dst2_tmp, top_w * sizeof(uint8_t)); + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + + break; + case PARTITION_VERT: + { +#if CONFIG_VP9_HIGHBITDEPTH + if (b_hdb) { + uint16_t *dst_tmp = dst16; + uint16_t *dst2_tmp = dst216; + generate_1dmask(w, mask + w_offset, + plane && xd->plane[plane].subsampling_x); + + for (i = 0; i < top_h; i++) { + for (j = w_offset; j < w_offset + w; j++) { + const int m = mask[j]; assert(m >= 0 && m <= 64); + if (m == 64) + continue; + + if (m == 0) + dst_tmp[j] = dst2_tmp[j]; + else + dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + } + memcpy(dst_tmp + j, dst2_tmp + j, + (top_w - w_offset - w) * sizeof(uint16_t)); + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + uint8_t *dst_tmp = dst; + uint8_t *dst2_tmp = dst2; + generate_1dmask(w, mask + w_offset, + plane && xd->plane[plane].subsampling_x); + + for (i = 0; i < top_h; i++) { + for (j = w_offset; j < w_offset + w; j++) { + const int m = mask[j]; assert(m >= 0 && m <= 64); + if (m == 64) + continue; + + if (m == 0) + dst_tmp[j] = dst2_tmp[j]; + else + dst_tmp[j] = (dst_tmp[j] * m + dst2_tmp[j] * (64 - m) + 32) >> 6; + } + memcpy(dst_tmp + j, dst2_tmp + j, + (top_w - w_offset - w) * sizeof(uint8_t)); + dst_tmp += dst_stride; + dst2_tmp += dst2_stride; + } +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + break; + default: + assert(0); + } + (void) xd; +} + +void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int block) { + // Prediction function used in supertx: + // Use the mv at current block (which is less than 8x8) + // to get prediction of a block located at (mi_row, mi_col) at size of bsize + // bsize can be larger than 8x8. + // block (0-3): the sub8x8 location of current block + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + + // For sub8x8 uv: + // Skip uv prediction in supertx except the first block (block = 0) + int max_plane = block ? 1 : MAX_MB_PLANE; + + for (plane = 0; plane < max_plane; plane++) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + build_inter_predictors(xd, plane, block, bw, bh, + 0, 0, bw, bh, + mi_x, mi_y); + } +} +#endif // CONFIG_SUPERTX diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index bcfa3f608..bc2df9e23 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h @@ -153,25 +153,39 @@ static INLINE MV average_split_mvs(const struct macroblockd_plane *pd, } void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y); + int bw, int bh, + int x, int y, int w, int h, + int mi_x, int mi_y); void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i, int ir, int ic, int mi_row, int mi_col); void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); + BLOCK_SIZE bsize); void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize, int plane); + BLOCK_SIZE bsize, int plane); void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); + BLOCK_SIZE bsize); void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, - BLOCK_SIZE bsize); + BLOCK_SIZE bsize); + +#if CONFIG_SUPERTX +void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int block); +struct macroblockd_plane; +void vp10_build_masked_inter_predictor_complex( + MACROBLOCKD *xd, + uint8_t *dst, int dst_stride, uint8_t *dst2, int dst2_stride, + const struct macroblockd_plane *pd, int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + PARTITION_TYPE partition, int plane); + +#endif // CONFIG_SUPERTX void vp10_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c index d2fbccd90..3098b361a 100644 --- a/vp10/common/thread_common.c +++ b/vp10/common/thread_common.c @@ -477,6 +477,15 @@ void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts, } #endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; i++) + for (j = 0; j < TX_SIZES; j++) + for (k = 0; k < 2; k++) + cm->counts.supertx[i][j][k] += counts->supertx[i][j][k]; + for (i = 0; i < TX_SIZES; i++) + cm->counts.supertx_size[i] += counts->supertx_size[i]; +#endif // CONFIG_SUPERTX + for (i = 0; i < PREDICTION_PROBS; i++) for (j = 0; j < 2; j++) cm->counts.seg.pred[i][j] += counts->seg.pred[i][j]; diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index f505e64b1..207ceda26 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -435,7 +435,7 @@ static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r, } } } -#endif +#endif // CONFIG_VAR_TX static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, MB_MODE_INFO *const mbmi, int plane, @@ -576,7 +576,9 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, subpel_y, sf, w, h, ref, kernel, xs, ys); } } + #else + static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, int x0, int y0, int b_w, int b_h, int frame_width, int frame_height, @@ -823,6 +825,49 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi, } } +static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi, + MACROBLOCKD *xd, + int mi_row, int mi_col, + int block) { + // Prediction function used in supertx: + // Use the mv at current block (which is less than 8x8) + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + const MODE_INFO *mi = xd->mi[0]; + const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter]; + const int is_compound = has_second_ref(&mi->mbmi); + + // For sub8x8 uv: + // Skip uv prediction in supertx except the first block (block = 0) + int max_plane = block ? 1 : MAX_MB_PLANE; + + for (plane = 0; plane < max_plane; ++plane) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + + const int n4w_x4 = 4 * num_4x4_w; + const int n4h_x4 = 4 * num_4x4_h; + int ref; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + const int idx = xd->block_refs[ref]->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; + const int is_scaled = vp10_is_scaled(sf); + const MV mv = average_split_mvs(pd, mi, ref, block); + dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4, + 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, + sf, pre_buf, dst_buf, &mv, ref_frame_buf, + is_scaled, ref); + } + } +} + static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, int n4_hl) { // get minimum log2 num4x4s dimension @@ -885,7 +930,588 @@ static MB_MODE_INFO *set_offsets(VP10_COMMON *const cm, MACROBLOCKD *const xd, return &xd->mi[0]->mbmi; } +#if CONFIG_SUPERTX +static MB_MODE_INFO *set_offsets_extend(VP10_COMMON *const cm, + MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize_pred, + int mi_row_pred, int mi_col_pred, + int mi_row_ori, int mi_col_ori) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + const int bw = num_8x8_blocks_wide_lookup[bsize_pred]; + const int bh = num_8x8_blocks_high_lookup[bsize_pred]; + const int offset = mi_row_ori * cm->mi_stride + mi_col_ori; + const int bwl = b_width_log2_lookup[bsize_pred]; + const int bhl = b_height_log2_lookup[bsize_pred]; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + set_mi_row_col(xd, tile, mi_row_pred, bh, mi_col_pred, bw, + cm->mi_rows, cm->mi_cols); + + xd->up_available = (mi_row_ori != 0); + xd->left_available = (mi_col_ori > tile->mi_col_start); + + set_plane_n4(xd, bw, bh, bwl, bhl); + + return &xd->mi[0]->mbmi; +} + +static MB_MODE_INFO *set_mb_offsets(VP10_COMMON *const cm, + MACROBLOCKD *const xd, + BLOCK_SIZE bsize, + int mi_row, int mi_col, + int bw, int bh, + int x_mis, int y_mis) { + const int offset = mi_row * cm->mi_stride + mi_col; + const TileInfo *const tile = &xd->tile; + int x, y; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + xd->mi[0]->mbmi.sb_type = bsize; + for (y = 0; y < y_mis; ++y) + for (x = !y; x < x_mis; ++x) + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; + + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + return &xd->mi[0]->mbmi; +} + +static void set_offsets_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int offset = mi_row * cm->mi_stride + mi_col; + const int bwl = b_width_log2_lookup[bsize]; + const int bhl = b_height_log2_lookup[bsize]; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + + set_plane_n4(xd, bw, bh, bwl, bhl); + + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); +} + +static void set_param_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, +#if CONFIG_EXT_TX + int txfm, +#endif + int skip) { + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); + const int offset = mi_row * cm->mi_stride + mi_col; + int x, y; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + + for (y = 0; y < y_mis; ++y) + for (x = 0; x < x_mis; ++x) { + xd->mi[y * cm->mi_stride + x]->mbmi.skip = skip; +#if CONFIG_EXT_TX + xd->mi[y * cm->mi_stride + x]->mbmi.tx_type = txfm; +#endif + } +} + +static void set_ref(VP10_COMMON *const cm, MACROBLOCKD *const xd, + int idx, int mi_row, int mi_col) { + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME]; + xd->block_refs[idx] = ref_buffer; + if (!vp10_is_valid_scale(&ref_buffer->sf)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid scale factors"); + vp10_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col, + &ref_buffer->sf); + xd->corrupted |= ref_buffer->buf->corrupted; +} + +static void dec_predict_b_extend( + VP10Decoder *const pbi, MACROBLOCKD *const xd, + const TileInfo *const tile, int block, + int mi_row_ori, int mi_col_ori, + int mi_row_pred, int mi_col_pred, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3], + BLOCK_SIZE bsize_top, + BLOCK_SIZE bsize_pred, + int b_sub8x8, int bextend) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + // (mi_row_top, mi_col_top, bsize_top): region of the top partition size + // block: sub location of sub8x8 blocks + // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8 + // bextend: 1: region to predict is an extension of ori; 0: not + int r = (mi_row_pred - mi_row_top) * MI_SIZE; + int c = (mi_col_pred - mi_col_top) * MI_SIZE; + const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top]; + const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top]; + MB_MODE_INFO *mbmi; + VP10_COMMON *const cm = &pbi->common; + + if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top || + mi_row_pred >= mi_row_top + mi_height_top || + mi_col_pred >= mi_col_top + mi_width_top || + mi_row_pred >= cm->mi_rows || mi_col_pred >= cm->mi_cols) + return; + + mbmi = set_offsets_extend(cm, xd, tile, bsize_pred, + mi_row_pred, mi_col_pred, + mi_row_ori, mi_col_ori); + set_ref(cm, xd, 0, mi_row_pred, mi_col_pred); + if (has_second_ref(&xd->mi[0]->mbmi)) + set_ref(cm, xd, 1, mi_row_pred, mi_col_pred); + + if (!bextend) { + mbmi->tx_size = b_width_log2_lookup[bsize_top]; + } + + xd->plane[0].dst.stride = dst_stride[0]; + xd->plane[1].dst.stride = dst_stride[1]; + xd->plane[2].dst.stride = dst_stride[2]; + xd->plane[0].dst.buf = dst_buf[0] + + (r >> xd->plane[0].subsampling_y) * dst_stride[0] + + (c >> xd->plane[0].subsampling_x); + xd->plane[1].dst.buf = dst_buf[1] + + (r >> xd->plane[1].subsampling_y) * dst_stride[1] + + (c >> xd->plane[1].subsampling_x); + xd->plane[2].dst.buf = dst_buf[2] + + (r >> xd->plane[2].subsampling_y) * dst_stride[2] + + (c >> xd->plane[2].subsampling_x); + + if (!b_sub8x8) + dec_build_inter_predictors_sb(pbi, xd, mi_row_pred, mi_col_pred); + else + dec_build_inter_predictors_sb_sub8x8(pbi, xd, mi_row_pred, mi_col_pred, + block); +} + +static void dec_extend_dir(VP10Decoder *const pbi, MACROBLOCKD *const xd, + const TileInfo *const tile, int block, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3], int dir) { + // dir: 0-lower, 1-upper, 2-left, 3-right + // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int xss = xd->plane[1].subsampling_x; + int yss = xd->plane[1].subsampling_y; + int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0; + BLOCK_SIZE extend_bsize; + int unit, mi_row_pred, mi_col_pred; + + if (dir == 0 || dir == 1) { + extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss) ? + BLOCK_8X8 : BLOCK_16X8; + unit = num_8x8_blocks_wide_lookup[extend_bsize]; + mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1); + mi_col_pred = mi_col; + + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + + if (mi_width > unit) { + int i; + assert(!b_sub8x8); + for (i = 0; i < mi_width/unit - 1; i++) { + mi_col_pred += unit; + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + } + } + } else if (dir == 2 || dir == 3) { + extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss) ? + BLOCK_8X8 : BLOCK_8X16; + unit = num_8x8_blocks_high_lookup[extend_bsize]; + mi_row_pred = mi_row; + mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1); + + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + + if (mi_height > unit) { + int i; + for (i = 0; i < mi_height/unit - 1; i++) { + mi_row_pred += unit; + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + } + } + } else { + extend_bsize = BLOCK_8X8; + mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1); + mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1); + dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, + dst_buf, dst_stride, + top_bsize, extend_bsize, b_sub8x8, 1); + } +} + +static void dec_extend_all(VP10Decoder *const pbi, MACROBLOCKD *const xd, + const TileInfo *const tile, int block, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3]) { + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 0); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 1); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 2); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 3); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 4); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 5); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 6); + dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 7); +} + +static void dec_predict_sb_complex(VP10Decoder *const pbi, + MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3]) { + VP10_COMMON *const cm = &pbi->common; + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + MB_MODE_INFO *mbmi; + int i, offset = mi_row * cm->mi_stride + mi_col; + uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; + + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); + dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); + dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); + } else { +#endif + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; + dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; + dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf3[0] = tmp_buf3; + dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; + dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + mbmi = &xd->mi[0]->mbmi; + partition = partition_lookup[bsl][mbmi->sb_type]; + subsize = get_subsize(bsize, partition); + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, bsize, 0, 0); + dec_extend_all(pbi, xd, tile, 0, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + break; + case PARTITION_HORZ: + if (bsize == BLOCK_8X8) { + // For sub8x8, predict in 8x8 unit + // First half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, BLOCK_8X8, 1, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + + // Second half + dec_predict_b_extend(pbi, xd, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + top_bsize, BLOCK_8X8, 1, 1); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1); + + // weighted average to smooth the boundary + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, 0); + } else { + // First half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, subsize, 0, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + else + dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 0); + + if (mi_row + hbs < cm->mi_rows) { + // Second half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, + mi_row_top, mi_col_top, + dst_buf1, dst_stride1, + top_bsize, subsize, 0, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, + mi_row + hbs, mi_col, + mi_row_top, mi_col_top, + dst_buf1, dst_stride1); + else + dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, + mi_row + hbs, mi_col, + mi_row_top, mi_col_top, + dst_buf1, dst_stride1, 1); + + // weighted average to smooth the boundary + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_HORZ, i); + } + } + } + break; + case PARTITION_VERT: + if (bsize == BLOCK_8X8) { + // First half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, BLOCK_8X8, 1, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + + // Second half + dec_predict_b_extend(pbi, xd, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + top_bsize, BLOCK_8X8, 1, 1); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1); + + // Smooth + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, 0); + } else { + // First half + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, subsize, 0, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + else + dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, 3); + + // Second half + if (mi_col + hbs < cm->mi_cols) { + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, top_bsize, subsize, 0, 0); + if (bsize < top_bsize) + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1); + else + dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, 2); + + // Smooth + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_VERT, i); + } + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + top_bsize, BLOCK_8X8, 1, 0); + dec_predict_b_extend(pbi, xd, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + top_bsize, BLOCK_8X8, 1, 1); + dec_predict_b_extend(pbi, xd, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf2, dst_stride2, + top_bsize, BLOCK_8X8, 1, 1); + dec_predict_b_extend(pbi, xd, tile, 3, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf3, dst_stride3, + top_bsize, BLOCK_8X8, 1, 1); + if (bsize < top_bsize) { + dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride); + dec_extend_all(pbi, xd, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1); + dec_extend_all(pbi, xd, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf2, dst_stride2); + dec_extend_all(pbi, xd, tile, 3, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf3, dst_stride3); + } + } else { + dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col, + mi_row_top, mi_col_top, subsize, top_bsize, + dst_buf, dst_stride); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col + hbs, + mi_row_top, mi_col_top, subsize, top_bsize, + dst_buf1, dst_stride1); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + dec_predict_sb_complex(pbi, xd, tile, mi_row + hbs, mi_col, + mi_row_top, mi_col_top, subsize, top_bsize, + dst_buf2, dst_stride2); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + dec_predict_sb_complex(pbi, xd, tile, mi_row + hbs, mi_col + hbs, + mi_row_top, mi_col_top, subsize, top_bsize, + dst_buf3, dst_stride3); + } + for (i = 0; i < MAX_MB_PLANE; i++) { + if (bsize == BLOCK_8X8 && i != 0) + continue; // Skip <4x4 chroma smoothing + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], dst_stride[i], + dst_buf1[i], + dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + if (mi_row + hbs < cm->mi_rows) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } + break; + default: + assert(0); + } +} +#endif // CONFIG_SUPERTX + static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif // CONFIG_SUPERTX int mi_row, int mi_col, vpx_reader *r, BLOCK_SIZE bsize, int bwl, int bhl) { @@ -896,8 +1522,22 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); +#if CONFIG_SUPERTX + MB_MODE_INFO *mbmi; + if (supertx_enabled) { + mbmi = set_mb_offsets(cm, xd, bsize, mi_row, mi_col, + bw, bh, x_mis, y_mis); + } else { + mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, + bw, bh, x_mis, y_mis, bwl, bhl); + } + vp10_read_mode_info(pbi, xd, supertx_enabled, + mi_row, mi_col, r, x_mis, y_mis); +#else MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, bwl, bhl); + vp10_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); +#endif // CONFIG_SUPERTX if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { const BLOCK_SIZE uv_subsize = @@ -907,88 +1547,92 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); } - vp10_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); - - if (mbmi->skip) { - dec_reset_skip_context(xd); - } - - if (!is_inter_block(mbmi)) { - int plane; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE tx_size = - plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) - : mbmi->tx_size; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - const int step = (1 << tx_size); - int row, col; - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? - 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? - 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); - - if (plane <= 1 && mbmi->palette_mode_info.palette_size[plane]) - vp10_decode_palette_tokens(xd, plane, r); - - for (row = 0; row < max_blocks_high; row += step) - for (col = 0; col < max_blocks_wide; col += step) - predict_and_reconstruct_intra_block(xd, r, mbmi, plane, - row, col, tx_size); +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif + if (mbmi->skip) { + dec_reset_skip_context(xd); } - } else { - // Prediction - dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); - - // Reconstruction - if (!mbmi->skip) { - int eobtotal = 0; + if (!is_inter_block(mbmi)) { int plane; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; - int row, col; -#if CONFIG_VAR_TX - // TODO(jingning): This can be simplified for decoder performance. - const BLOCK_SIZE plane_bsize = - get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd); - const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; - const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; - int bw = num_4x4_blocks_wide_lookup[txb_size]; - int block = 0; - const int step = 1 << (max_tx_size << 1); - - for (row = 0; row < num_4x4_h; row += bw) { - for (col = 0; col < num_4x4_w; col += bw) { - decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, - block, row, col, max_tx_size, &eobtotal); - block += step; - } - } -#else const TX_SIZE tx_size = plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) - : mbmi->tx_size; + : mbmi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); - const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? - 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); - const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? - 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + int row, col; + const int max_blocks_wide = num_4x4_w + + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) - eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col, - tx_size); -#endif + predict_and_reconstruct_intra_block(xd, r, mbmi, plane, + row, col, tx_size); } + } else { + // Prediction + dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); + + // Reconstruction + if (!mbmi->skip) { + int eobtotal = 0; + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + int row, col; +#if CONFIG_VAR_TX + // TODO(jingning): This can be simplified for decoder performance. + const BLOCK_SIZE plane_bsize = + get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd); + const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + int bw = num_4x4_blocks_wide_lookup[txb_size]; + int block = 0; + const int step = 1 << (max_tx_size << 1); + + for (row = 0; row < num_4x4_h; row += bw) { + for (col = 0; col < num_4x4_w; col += bw) { + decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, + block, row, col, max_tx_size, &eobtotal); + block += step; + } + } +#else + const TX_SIZE tx_size = + plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) + : mbmi->tx_size; + const int step = (1 << tx_size); + const int max_blocks_wide = num_4x4_w + + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col, + tx_size); +#endif + } - if (!less8x8 && eobtotal == 0) - mbmi->has_no_coeffs = 1; // skip loopfilter + if (!less8x8 && eobtotal == 0) + mbmi->has_no_coeffs = 1; // skip loopfilter + } } +#if CONFIG_SUPERTX } +#endif // CONFIG_SUPERTX xd->corrupted |= vpx_reader_has_error(r); } @@ -1042,8 +1686,23 @@ static PARTITION_TYPE read_partition(VP10_COMMON *cm, MACROBLOCKD *xd, return p; } +#if CONFIG_SUPERTX +static int read_skip_without_seg(VP10_COMMON *cm, const MACROBLOCKD *xd, + vpx_reader *r) { + const int ctx = vp10_get_skip_context(xd); + const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->skip[ctx][skip]; + return skip; +} +#endif // CONFIG_SUPERTX + // TODO(slavarnway): eliminate bsize and subsize in future commits static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, vpx_reader* r, BLOCK_SIZE bsize, int n4x4_l2) { VP10_COMMON *const cm = &pbi->common; @@ -1054,6 +1713,15 @@ static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd, BLOCK_SIZE subsize; const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; +#if CONFIG_SUPERTX + const int read_token = !supertx_enabled; + int skip = 0; + TX_SIZE supertx_size = b_width_log2_lookup[bsize]; + const TileInfo *const tile = &xd->tile; +#if CONFIG_EXT_TX + int txfm = DCT_DCT; +#endif // CONFIG_EXT_TX +#endif // CONFIG_SUPERTX if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -1061,40 +1729,169 @@ static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd, partition = read_partition(cm, xd, mi_row, mi_col, r, has_rows, has_cols, n8x8_l2); subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition); +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && + partition != PARTITION_NONE && + bsize <= MAX_SUPERTX_BLOCK_SIZE && + !supertx_enabled && + !xd->lossless[0]) { + const int supertx_context = + partition_supertx_context_lookup[partition]; + supertx_enabled = vpx_read( + r, cm->fc->supertx_prob[supertx_context][supertx_size]); + if (xd->counts) + xd->counts->supertx[supertx_context][supertx_size][supertx_enabled]++; + } + if (supertx_enabled && read_token) { + int offset = mi_row * cm->mi_stride + mi_col; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = cm->mi + offset; + set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize], + mi_col, num_8x8_blocks_wide_lookup[bsize], + cm->mi_rows, cm->mi_cols); + set_skip_context(xd, mi_row, mi_col); + // Here skip is read without using any segment level feature + skip = read_skip_without_seg(cm, xd, r); + if (skip) + reset_skip_context(xd, bsize); +#if CONFIG_EXT_TX + if (!skip) { + if (supertx_size <= TX_16X16) { + int eset = get_ext_tx_set(supertx_size, bsize, 1); + if (eset > 0) { + txfm = vpx_read_tree(r, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][supertx_size]); + if (xd->counts) + ++xd->counts->inter_ext_tx[eset][supertx_size][txfm]; + } + } + } +#endif // CONFIG_EXT_TX + } +#endif // CONFIG_SUPERTX if (!hbs) { // calculate bmode block dimensions (log 2) xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); - decode_block(pbi, xd, mi_row, mi_col, r, subsize, 1, 1); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, 1, 1); } else { switch (partition) { case PARTITION_NONE: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2); break; case PARTITION_HORZ: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2); if (has_rows) - decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize, n4x4_l2, - n8x8_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + hbs, mi_col, r, subsize, n4x4_l2, n8x8_l2); break; case PARTITION_VERT: - decode_block(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2); if (has_cols) - decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2, - n4x4_l2); + decode_block(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col + hbs, r, subsize, n8x8_l2, n4x4_l2); break; case PARTITION_SPLIT: - decode_partition(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize, n8x8_l2); - decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize, - n8x8_l2); + decode_partition(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r, subsize, n8x8_l2); + decode_partition(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col + hbs, r, subsize, n8x8_l2); + decode_partition(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + hbs, mi_col, r, subsize, n8x8_l2); + decode_partition(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + hbs, mi_col + hbs, r, subsize, n8x8_l2); break; default: assert(0 && "Invalid partition type"); } } +#if CONFIG_SUPERTX + if (supertx_enabled && read_token) { + uint8_t *dst_buf[3]; + int dst_stride[3], i; + + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); + for (i = 0; i < MAX_MB_PLANE; i++) { + dst_buf[i] = xd->plane[i].dst.buf; + dst_stride[i] = xd->plane[i].dst.stride; + } + dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col, mi_row, mi_col, + bsize, bsize, dst_buf, dst_stride); + + if (!skip) { + int eobtotal = 0; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + set_offsets_topblock(cm, xd, tile, bsize, mi_row, mi_col); +#if CONFIG_EXT_TX + xd->mi[0]->mbmi.tx_type = txfm; +#endif + for (i = 0; i < MAX_MB_PLANE; ++i) { + const struct macroblockd_plane *const pd = &xd->plane[i]; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + int row, col; + const TX_SIZE tx_size = + i ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) + : mbmi->tx_size; + const int step = (1 << tx_size); + const int max_blocks_wide = num_4x4_w + + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + eobtotal += reconstruct_inter_block(xd, r, mbmi, i, row, col, + tx_size); + } + if (!(subsize < BLOCK_8X8) && eobtotal == 0) + skip = 1; + } + set_param_topblock(cm, xd, bsize, mi_row, mi_col, +#if CONFIG_EXT_TX + txfm, +#endif + skip); + } +#endif // CONFIG_SUPERTX + // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) @@ -1632,8 +2429,12 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, #endif for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(pbi, &tile_data->xd, mi_row, - mi_col, &tile_data->bit_reader, BLOCK_64X64, 4); + decode_partition(pbi, &tile_data->xd, +#if CONFIG_SUPERTX + 0, +#endif + mi_row, mi_col, &tile_data->bit_reader, + BLOCK_64X64, 4); } pbi->mb.corrupted |= tile_data->xd.corrupted; if (pbi->mb.corrupted) @@ -1716,6 +2517,9 @@ static int tile_worker_hook(TileWorkerData *const tile_data, for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data->pbi, &tile_data->xd, +#if CONFIG_SUPERTX + 0, +#endif mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64, 4); } @@ -2214,9 +3018,25 @@ static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { } #endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX +static void read_supertx_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j; + if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) { + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + vp10_diff_update_prob(r, &fc->supertx_prob[i][j]); + } + } + } +} +#endif // CONFIG_SUPERTX + static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, size_t partition_size) { VP10_COMMON *const cm = &pbi->common; +#if CONFIG_SUPERTX + MACROBLOCKD *const xd = &pbi->mb; +#endif FRAME_CONTEXT *const fc = cm->fc; vpx_reader r; int k, i, j; @@ -2283,6 +3103,10 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); #if CONFIG_EXT_TX read_ext_tx_probs(fc, &r); +#endif +#if CONFIG_SUPERTX + if (!xd->lossless[0]) + read_supertx_probs(fc, &r); #endif } diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c index 392388a20..f43620316 100644 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c @@ -929,98 +929,117 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi, static void read_inter_frame_mode_info(VP10Decoder *const pbi, MACROBLOCKD *const xd, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif // CONFIG_SUPERTX int mi_row, int mi_col, vpx_reader *r) { VP10_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; - int inter_block; + int inter_block = 1; #if CONFIG_VAR_TX BLOCK_SIZE bsize = mbmi->sb_type; -#endif +#endif // CONFIG_VAR_TX +#if CONFIG_SUPERTX + (void) supertx_enabled; +#endif // CONFIG_SUPERTX mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); - mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); - inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); +#if CONFIG_SUPERTX + if (!supertx_enabled) { +#endif // CONFIG_SUPERTX + mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); + inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); #if CONFIG_VAR_TX - xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); - if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && - !mbmi->skip && inter_block) { - const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; - const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; - const int bs = num_4x4_blocks_wide_lookup[txb_size]; - const int width = num_4x4_blocks_wide_lookup[bsize]; - const int height = num_4x4_blocks_high_lookup[bsize]; - int idx, idy; - for (idy = 0; idy < height; idy += bs) - for (idx = 0; idx < width; idx += bs) - read_tx_size_inter(cm, xd, mbmi, xd->counts, max_tx_size, - idy, idx, r); - if (xd->counts) { - const int ctx = get_tx_size_context(xd); - ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size]; - } - } else { - mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); - if (inter_block) { + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && + !mbmi->skip && inter_block) { + const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; + const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + const int bs = num_4x4_blocks_wide_lookup[txb_size]; const int width = num_4x4_blocks_wide_lookup[bsize]; const int height = num_4x4_blocks_high_lookup[bsize]; int idx, idy; - for (idy = 0; idy < height; ++idy) - for (idx = 0; idx < width; ++idx) - mbmi->inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = mbmi->tx_size; - } + for (idy = 0; idy < height; idy += bs) + for (idx = 0; idx < width; idx += bs) + read_tx_size_inter(cm, xd, mbmi, xd->counts, max_tx_size, + idy, idx, r); + if (xd->counts) { + const int ctx = get_tx_size_context(xd); + ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size]; + } + } else { + mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); + if (inter_block) { + const int width = num_4x4_blocks_wide_lookup[bsize]; + const int height = num_4x4_blocks_high_lookup[bsize]; + int idx, idy; + for (idy = 0; idy < height; ++idy) + for (idx = 0; idx < width; ++idx) + mbmi->inter_tx_size[(idy >> 1) * 8 + (idx >> 1)] = mbmi->tx_size; + } - set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h); - set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w); - } + set_txfm_ctx(xd->left_txfm_context, mbmi->tx_size, xd->n8_h); + set_txfm_ctx(xd->above_txfm_context, mbmi->tx_size, xd->n8_w); + } #else - mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); -#endif + mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); +#endif // CONFIG_VAR_TX +#if CONFIG_SUPERTX + } +#endif // CONFIG_SUPERTX if (inter_block) - read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); + read_inter_block_mode_info(pbi, xd, + mi, mi_row, mi_col, r); else read_intra_block_mode_info(cm, xd, mi, r); #if CONFIG_EXT_TX - if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, inter_block) > 1 && - cm->base_qindex > 0 && !mbmi->skip && - !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, - inter_block); - FRAME_COUNTS *counts = xd->counts; + if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, inter_block) > 1 && + cm->base_qindex > 0 && !mbmi->skip && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif // CONFIG_SUPERTX + !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, + inter_block); + FRAME_COUNTS *counts = xd->counts; - if (inter_block) { - if (eset > 0) { - mbmi->tx_type = - vpx_read_tree(r, vp10_ext_tx_inter_tree[eset], - cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size]); - if (counts) - ++counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type]; - } - } else if (ALLOW_INTRA_EXT_TX) { - if (eset > 0) { - mbmi->tx_type = vpx_read_tree(r, vp10_ext_tx_intra_tree[eset], - cm->fc->intra_ext_tx_prob[eset] + if (inter_block) { + if (eset > 0) { + mbmi->tx_type = + vpx_read_tree(r, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][mbmi->tx_size]); + if (counts) + ++counts->inter_ext_tx[eset][mbmi->tx_size][mbmi->tx_type]; + } + } else if (ALLOW_INTRA_EXT_TX) { + if (eset > 0) { + mbmi->tx_type = vpx_read_tree(r, vp10_ext_tx_intra_tree[eset], + cm->fc->intra_ext_tx_prob[eset] [mbmi->tx_size][mbmi->mode]); - if (counts) - ++counts->intra_ext_tx[eset][mbmi->tx_size] - [mbmi->mode][mbmi->tx_type]; - } + if (counts) + ++counts->intra_ext_tx[eset][mbmi->tx_size] + [mbmi->mode][mbmi->tx_type]; } - } else { - mbmi->tx_type = DCT_DCT; } + } else { + mbmi->tx_type = DCT_DCT; + } #endif // CONFIG_EXT_TX } void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis) { +#if CONFIG_SUPERTX + int supertx_enabled, +#endif // CONFIG_SUPERTX + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { VP10_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; @@ -1039,7 +1058,11 @@ void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd, } #endif } else { - read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r); + read_inter_frame_mode_info(pbi, xd, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, r); for (h = 0; h < y_mis; ++h) { MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { diff --git a/vp10/decoder/decodemv.h b/vp10/decoder/decodemv.h index 6653be5f6..959a0010f 100644 --- a/vp10/decoder/decodemv.h +++ b/vp10/decoder/decodemv.h @@ -20,8 +20,12 @@ extern "C" { #endif void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd, - int mi_row, int mi_col, vpx_reader *r, - int x_mis, int y_mis); +#if CONFIG_SUPERTX + int supertx_enabled, +#endif + + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis); #ifdef __cplusplus } // extern "C" diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 30ca78bdb..31f636ade 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -96,6 +96,16 @@ void vp10_encode_token_init() { #endif // CONFIG_EXT_TX } +#if CONFIG_SUPERTX +static int vp10_check_supertx(VP10_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + MODE_INFO *mi; + mi = cm->mi + (mi_row * cm->mi_stride + mi_col); + return mi[0].mbmi.tx_size == max_txsize_lookup[bsize] && + mi[0].mbmi.sb_type < bsize; +} +#endif // CONFIG_SUPERTX + static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode, const vpx_prob *probs) { vp10_write_token(w, vp10_intra_mode_tree, probs, &intra_mode_encodings[mode]); @@ -357,6 +367,32 @@ static void pack_palette_tokens(vpx_writer *w, TOKENEXTRA **tp, *tp = p; } +#if CONFIG_SUPERTX +static void update_supertx_probs(VP10_COMMON *cm, vpx_writer *w) { + const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) - + vp10_cost_zero(GROUP_DIFF_UPDATE_PROB); + int i, j; + int savings = 0; + int do_update = 0; + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + savings += vp10_cond_prob_diff_update_savings(&cm->fc->supertx_prob[i][j], + cm->counts.supertx[i][j]); + } + } + do_update = savings > savings_thresh; + vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB); + if (do_update) { + for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) { + for (j = 1; j < TX_SIZES; ++j) { + vp10_cond_prob_diff_update(w, &cm->fc->supertx_prob[i][j], + cm->counts.supertx[i][j]); + } + } + } +} +#endif // CONFIG_SUPERTX + static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp, const TOKENEXTRA *const stop, vpx_bit_depth_t bit_depth, const TX_SIZE tx) { @@ -628,6 +664,9 @@ static void write_switchable_interp_filter(VP10_COMP *cpi, } static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif vpx_writer *w) { VP10_COMMON *const cm = &cpi->common; const nmv_context *nmvc = &cm->fc->nmvc; @@ -657,12 +696,25 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, } } +#if CONFIG_SUPERTX + if (supertx_enabled) + skip = mbmi->skip; + else + skip = write_skip(cm, xd, segment_id, mi, w); +#else skip = write_skip(cm, xd, segment_id, mi, w); +#endif // CONFIG_SUPERTX - if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) - vpx_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd)); +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif // CONFIG_SUPERTX + if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) + vpx_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd)); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && +#if CONFIG_SUPERTX + !supertx_enabled && +#endif // CONFIG_SUPERTX !(is_inter && skip)) { #if CONFIG_VAR_TX if (is_inter) { // This implies skip flag is 0. @@ -898,6 +950,9 @@ static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd, static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile, vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col) { const VP10_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; @@ -920,7 +975,11 @@ static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile, xd->above_txfm_context = cm->above_txfm_context + mi_col; xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); #endif - pack_inter_mode_mvs(cpi, m, w); + pack_inter_mode_mvs(cpi, m, +#if CONFIG_SUPERTX + supertx_enabled, +#endif + w); } if (m->mbmi.palette_mode_info.palette_size[0] > 0) { @@ -930,6 +989,10 @@ static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile, assert(*tok < tok_end); } +#if CONFIG_SUPERTX + if (supertx_enabled) return; +#endif // CONFIG_SUPERTX + if (!m->mbmi.skip) { assert(*tok < tok_end); for (plane = 0; plane < MAX_MB_PLANE; ++plane) { @@ -971,7 +1034,7 @@ static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile, TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) : m->mbmi.tx_size; pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx); -#endif +#endif // CONFIG_VAR_TX assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN); (*tok)++; } @@ -1003,6 +1066,9 @@ static void write_partition(const VP10_COMMON *const cm, static void write_modes_sb(VP10_COMP *cpi, const TileInfo *const tile, vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, +#if CONFIG_SUPERTX + int supertx_enabled, +#endif int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP10_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; @@ -1011,7 +1077,12 @@ static void write_modes_sb(VP10_COMP *cpi, const int bs = (1 << bsl) / 4; PARTITION_TYPE partition; BLOCK_SIZE subsize; - const MODE_INFO *m = NULL; + MODE_INFO *m = NULL; +#if CONFIG_SUPERTX + const int pack_token = !supertx_enabled; + TX_SIZE supertx_size; + int plane; +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -1021,36 +1092,118 @@ static void write_modes_sb(VP10_COMP *cpi, partition = partition_lookup[bsl][m->mbmi.sb_type]; write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); +#if CONFIG_SUPERTX + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + set_mi_row_col(xd, tile, + mi_row, num_8x8_blocks_high_lookup[bsize], + mi_col, num_8x8_blocks_wide_lookup[bsize], + cm->mi_rows, cm->mi_cols); + if (!supertx_enabled && + !frame_is_intra_only(cm) && + partition != PARTITION_NONE && bsize <= MAX_SUPERTX_BLOCK_SIZE && + !xd->lossless[0]) { + vpx_prob prob; + supertx_size = max_txsize_lookup[bsize]; + prob = cm->fc->supertx_prob[partition_supertx_context_lookup[partition]] + [supertx_size]; + supertx_enabled = (xd->mi[0]->mbmi.tx_size == supertx_size); + vpx_write(w, supertx_enabled, prob); + if (supertx_enabled) { + vpx_write(w, xd->mi[0]->mbmi.skip, vp10_get_skip_prob(cm, xd)); +#if CONFIG_EXT_TX + if (supertx_size <= TX_16X16 && !xd->mi[0]->mbmi.skip) { + int eset = get_ext_tx_set(supertx_size, bsize, 1); + if (eset > 0) { + vp10_write_token( + w, vp10_ext_tx_inter_tree[eset], + cm->fc->inter_ext_tx_prob[eset][supertx_size], + &ext_tx_inter_encodings[eset][xd->mi[0]->mbmi.tx_type]); + } + } +#endif // CONFIG_EXT_TX + } + } +#endif // CONFIG_SUPERTX if (subsize < BLOCK_8X8) { - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col); } else { switch (partition) { case PARTITION_NONE: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col); break; case PARTITION_HORZ: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col); if (mi_row + bs < cm->mi_rows) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + bs, mi_col); break; case PARTITION_VERT: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col); if (mi_col + bs < cm->mi_cols) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs); + write_modes_b(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col + bs); break; case PARTITION_SPLIT: - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs, - subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col, - subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs, - subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row, mi_col + bs, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + bs, mi_col, subsize); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + supertx_enabled, +#endif // CONFIG_SUPERTX + mi_row + bs, mi_col + bs, subsize); break; default: assert(0); } } +#if CONFIG_SUPERTX + if (partition != PARTITION_NONE && supertx_enabled && pack_token && + !m->mbmi.skip) { + assert(*tok < tok_end); + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) + : m->mbmi.tx_size; + pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx); + assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN); + (*tok)++; + } + } +#endif // CONFIG_SUPERTX // update partition context if (bsize >= BLOCK_8X8 && @@ -1072,8 +1225,11 @@ static void write_modes(VP10_COMP *cpi, #endif for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, - BLOCK_64X64); + write_modes_sb(cpi, tile, w, tok, tok_end, +#if CONFIG_SUPERTX + 0, +#endif + mi_row, mi_col, BLOCK_64X64); } } @@ -1762,6 +1918,9 @@ static void write_uncompressed_header(VP10_COMP *cpi, static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) { VP10_COMMON *const cm = &cpi->common; +#if CONFIG_SUPERTX + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; +#endif // CONFIG_SUPERTX FRAME_CONTEXT *const fc = cm->fc; FRAME_COUNTS *counts = cpi->td.counts; vpx_writer header_bc; @@ -1843,6 +2002,10 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) { #if CONFIG_EXT_TX update_ext_tx_probs(cm, &header_bc); #endif // CONFIG_EXT_TX +#if CONFIG_SUPERTX + if (!xd->lossless[0]) + update_supertx_probs(cm, &header_bc); +#endif // CONFIG_SUPERTX } vpx_stop_encode(&header_bc); diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index daf058e34..a7bce1da7 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -36,6 +36,9 @@ #include "vp10/encoder/aq_complexity.h" #include "vp10/encoder/aq_cyclicrefresh.h" #include "vp10/encoder/aq_variance.h" +#if CONFIG_SUPERTX +#include "vp10/encoder/cost.h" +#endif #include "vp10/encoder/encodeframe.h" #include "vp10/encoder/encodemb.h" #include "vp10/encoder/encodemv.h" @@ -51,6 +54,40 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData * td, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); +#if CONFIG_SUPERTX +static int check_intra_b(PICK_MODE_CONTEXT *ctx); + +static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree); +static void predict_superblock(VP10_COMP *cpi, ThreadData *td, + int mi_row_pred, int mi_col_pred, + BLOCK_SIZE bsize_pred, int b_sub8x8, int block); +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree); +static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_ori, int mi_col_ori, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree); +static void update_state_sb_supertx(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int output_enabled, PC_TREE *pc_tree); +static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, +#if CONFIG_EXT_TX + TX_TYPE *best_tx, +#endif + PC_TREE *pc_tree); +#endif // CONFIG_SUPERTX + // This is used as a reference when computing the source variance for the // purposes of activity masking. // Eventually this should be replaced by custom no-reference routines, @@ -170,11 +207,11 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP10_COMP *cpi, // Lighter version of set_offsets that only sets the mode info // pointers. -static INLINE void set_mode_info_offsets(VP10_COMP *const cpi, - MACROBLOCK *const x, - MACROBLOCKD *const xd, - int mi_row, - int mi_col) { +static void set_mode_info_offsets(VP10_COMP *const cpi, + MACROBLOCK *const x, + MACROBLOCKD *const xd, + int mi_row, + int mi_col) { VP10_COMMON *const cm = &cpi->common; const int idx_str = xd->mi_stride * mi_row + mi_col; xd->mi = cm->mi_grid_visible + idx_str; @@ -245,6 +282,80 @@ static void set_offsets(VP10_COMP *cpi, const TileInfo *const tile, xd->tile = *tile; } +#if CONFIG_SUPERTX +static void set_offsets_supertx(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &td->mb; + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, + cm->mi_rows, cm->mi_cols); +} + +static void set_offsets_extend(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row_pred, int mi_col_pred, + int mi_row_ori, int mi_col_ori, + BLOCK_SIZE bsize_pred, BLOCK_SIZE bsize_ori) { + // Used in supertx + // (mi_row_ori, mi_col_ori, bsize_ori): region for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + MACROBLOCK *const x = &td->mb; + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; + const int mi_width = num_8x8_blocks_wide_lookup[bsize_pred]; + const int mi_height = num_8x8_blocks_high_lookup[bsize_pred]; + const struct segmentation *const seg = &cm->seg; + + set_mode_info_offsets(cpi, x, xd, mi_row_ori, mi_col_ori); + + mbmi = &xd->mi[0]->mbmi; + + // Set up limit values for MV components. + // Mv beyond the range do not produce new/different prediction block. + x->mv_row_min = -(((mi_row_pred + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_col_min = -(((mi_col_pred + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_row_max = (cm->mi_rows - mi_row_pred) * MI_SIZE + VP9_INTERP_EXTEND; + x->mv_col_max = (cm->mi_cols - mi_col_pred) * MI_SIZE + VP9_INTERP_EXTEND; + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col_pred & (mi_width - 1)) && !(mi_row_pred & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row_pred, mi_height, mi_col_pred, mi_width, + cm->mi_rows, cm->mi_cols); + xd->up_available = (mi_row_ori != 0); + xd->left_available = (mi_col_ori > tile->mi_col_start); + + // R/D setup. + x->rddiv = cpi->rd.RDDIV; + x->rdmult = cpi->rd.RDMULT; + + // Setup segment ID. + if (seg->enabled) { + if (cpi->oxcf.aq_mode != VARIANCE_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = get_segment_id(cm, map, bsize_ori, + mi_row_ori, mi_col_ori); + } + vp10_init_plane_quantizers(cpi, x); + + x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; + } else { + mbmi->segment_id = 0; + x->encode_breakout = cpi->encode_breakout; + } +} +#endif // CONFIG_SUPERTX + static void set_block_size(VP10_COMP * const cpi, MACROBLOCK *const x, MACROBLOCKD *const xd, @@ -973,7 +1084,9 @@ static void update_state(VP10_COMP *cpi, ThreadData *td, const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; +#if !CONFIG_SUPERTX assert(mi->mbmi.sb_type == bsize); +#endif *mi_addr = *mi; *x->mbmi_ext = ctx->mbmi_ext; @@ -991,8 +1104,8 @@ static void update_state(VP10_COMP *cpi, ThreadData *td, // and then update the quantizer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { vp10_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row, - mi_col, bsize, ctx->rate, ctx->dist, - x->skip); + mi_col, bsize, ctx->rate, ctx->dist, + x->skip); } } @@ -1099,6 +1212,285 @@ static void update_state(VP10_COMP *cpi, ThreadData *td, } } +#if CONFIG_SUPERTX +static void update_state_supertx(VP10_COMP *cpi, ThreadData *td, + PICK_MODE_CONTEXT *ctx, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int output_enabled) { + int i, y, x_idx; + VP10_COMMON *const cm = &cpi->common; + RD_COUNTS *const rdc = &td->rd_counts; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi = &ctx->mic; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi_addr = xd->mi[0]; + const struct segmentation *const seg = &cm->seg; + const int mis = cm->mi_stride; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int x_mis = VPXMIN(mi_width, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(mi_height, cm->mi_rows - mi_row); + MV_REF *const frame_mvs = + cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; + int w, h; + + *mi_addr = *mi; + *x->mbmi_ext = ctx->mbmi_ext; + assert(is_inter_block(mbmi)); + + // If segmentation in use + if (seg->enabled && output_enabled) { + // For in frame complexity AQ copy the segment id from the segment map. + if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mi_addr->mbmi.segment_id = + get_segment_id(cm, map, bsize, mi_row, mi_col); + } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + // Else for cyclic refresh mode update the segment map, set the segment id + // and then update the quantizer. + vp10_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, + ctx->rate, ctx->dist, 1); + vp10_init_plane_quantizers(cpi, x); + } + } + + // Restore the coding context of the MB to that that was in place + // when the mode was picked for it + for (y = 0; y < mi_height; y++) + for (x_idx = 0; x_idx < mi_width; x_idx++) + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { + xd->mi[x_idx + y * mis] = mi_addr; + } + + if (cpi->oxcf.aq_mode) + vp10_init_plane_quantizers(cpi, x); + + if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) { + mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; + } + + x->skip = ctx->skip; + memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, + sizeof(uint8_t) * ctx->num_4x4_blk); + + if (!output_enabled) + return; + + if (!frame_is_intra_only(cm)) { + vp10_update_mv_count(td); + + if (cm->interp_filter == SWITCHABLE +#if CONFIG_EXT_INTERP + && vp10_is_interp_needed(xd) +#endif + ) { + const int ctx = vp10_get_pred_context_switchable_interp(xd); + ++td->counts->switchable_interp[ctx][mbmi->interp_filter]; + } + + rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; + rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; + rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + rdc->filter_diff[i] += ctx->best_filter_diff[i]; + } + + for (h = 0; h < y_mis; ++h) { + MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; + for (w = 0; w < x_mis; ++w) { + MV_REF *const mv = frame_mv + w; + mv->ref_frame[0] = mi->mbmi.ref_frame[0]; + mv->ref_frame[1] = mi->mbmi.ref_frame[1]; + mv->mv[0].as_int = mi->mbmi.mv[0].as_int; + mv->mv[1].as_int = mi->mbmi.mv[1].as_int; + } + } +} + +static void update_state_sb_supertx(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int output_enabled, PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = xd->plane; + int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = pc_tree->partitioning; + BLOCK_SIZE subsize = get_subsize(bsize, partition); + int i; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, td, &pc_tree->none, mi_row, mi_col, + subsize, output_enabled); + break; + case PARTITION_VERT: + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, + subsize, output_enabled); + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize); + update_state_supertx(cpi, td, &pc_tree->vertical[1], + mi_row, mi_col + hbs, subsize, output_enabled); + } + break; + case PARTITION_HORZ: + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, td, &pc_tree->horizontal[0], mi_row, mi_col, + subsize, output_enabled); + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize); + update_state_supertx(cpi, td, &pc_tree->horizontal[1], mi_row + hbs, + mi_col, subsize, output_enabled); + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_supertx(cpi, td, pc_tree->leaf_split[0], mi_row, mi_col, + subsize, output_enabled); + } else { + set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize); + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, subsize, + output_enabled, pc_tree->split[0]); + set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize); + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize, + output_enabled, pc_tree->split[1]); + set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize); + update_state_sb_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize, + output_enabled, pc_tree->split[2]); + set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, subsize); + update_state_sb_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, + subsize, output_enabled, pc_tree->split[3]); + } + break; + default: + assert(0); + } + + for (i = 0; i < MAX_MB_PLANE; ++i) { + p[i].coeff = (&pc_tree->none)->coeff_pbuf[i][1]; + p[i].qcoeff = (&pc_tree->none)->qcoeff_pbuf[i][1]; + pd[i].dqcoeff = (&pc_tree->none)->dqcoeff_pbuf[i][1]; + p[i].eobs = (&pc_tree->none)->eobs_pbuf[i][1]; + } +} + +static void update_supertx_param(ThreadData *td, + PICK_MODE_CONTEXT *ctx, +#if CONFIG_EXT_TX + int best_tx, +#endif + TX_SIZE supertx_size) { + MACROBLOCK *const x = &td->mb; + + ctx->mic.mbmi.tx_size = supertx_size; + memcpy(ctx->zcoeff_blk, x->zcoeff_blk[supertx_size], + sizeof(uint8_t) * ctx->num_4x4_blk); + ctx->skip = x->skip; +#if CONFIG_EXT_TX + ctx->mic.mbmi.tx_type = best_tx; +#endif // CONFIG_EXT_TX +} + +static void update_supertx_param_sb(VP10_COMP *cpi, ThreadData *td, + int mi_row, int mi_col, + BLOCK_SIZE bsize, +#if CONFIG_EXT_TX + int best_tx, +#endif + TX_SIZE supertx_size, PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = pc_tree->partitioning; + BLOCK_SIZE subsize = get_subsize(bsize, partition); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + update_supertx_param(td, &pc_tree->none, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_VERT: + update_supertx_param(td, &pc_tree->vertical[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) + update_supertx_param(td, &pc_tree->vertical[1], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_HORZ: + update_supertx_param(td, &pc_tree->horizontal[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) + update_supertx_param(td, &pc_tree->horizontal[1], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + update_supertx_param(td, pc_tree->leaf_split[0], +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size); + } else { + update_supertx_param_sb(cpi, td, mi_row, mi_col, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[0]); + update_supertx_param_sb(cpi, td, mi_row, mi_col + hbs, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[1]); + update_supertx_param_sb(cpi, td, mi_row + hbs, mi_col, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[2]); + update_supertx_param_sb(cpi, td, mi_row + hbs, mi_col + hbs, subsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree->split[3]); + } + break; + default: + assert(0); + } +} +#endif // CONFIG_SUPERTX + void vp10_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { uint8_t *const buffers[3] = {src->y_buffer, src->u_buffer, src->v_buffer }; @@ -1130,6 +1522,9 @@ static void rd_pick_sb_modes(VP10_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *totalrate_nocoef, +#endif BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd) { VP10_COMMON *const cm = &cpi->common; @@ -1222,17 +1617,30 @@ static void rd_pick_sb_modes(VP10_COMP *cpi, // as a predictor for MBs that follow in the SB if (frame_is_intra_only(cm)) { vp10_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); +#if CONFIG_SUPERTX + *totalrate_nocoef = 0; +#endif // CONFIG_SUPERTX } else { if (bsize >= BLOCK_8X8) { - if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { vp10_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, ctx, best_rd); - else - vp10_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, - rd_cost, bsize, ctx, best_rd); +#if CONFIG_SUPERTX + *totalrate_nocoef = rd_cost->rate; +#endif // CONFIG_SUPERTX + } else { + vp10_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, +#if CONFIG_SUPERTX + totalrate_nocoef, +#endif // CONFIG_SUPERTX + bsize, ctx, best_rd); + } } else { - vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, - rd_cost, bsize, ctx, best_rd); + vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost, +#if CONFIG_SUPERTX + totalrate_nocoef, +#endif // CONFIG_SUPERTX + bsize, ctx, best_rd); } } @@ -1285,7 +1693,11 @@ static void update_inter_mode_stats(FRAME_COUNTS *counts, } #endif -static void update_stats(VP10_COMMON *cm, ThreadData *td) { +static void update_stats(VP10_COMMON *cm, ThreadData *td +#if CONFIG_SUPERTX + , int supertx_enabled +#endif + ) { const MACROBLOCK *x = &td->mb; const MACROBLOCKD *const xd = &x->e_mbd; const MODE_INFO *const mi = xd->mi[0]; @@ -1299,6 +1711,9 @@ static void update_stats(VP10_COMMON *cm, ThreadData *td) { const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); if (!seg_ref_active) { +#if CONFIG_SUPERTX + if (!supertx_enabled) +#endif counts->intra_inter[vp10_get_intra_inter_context(xd)][inter_block]++; // If the segment reference feature is enabled we have only a single // reference frame allowed for the segment so exclude it from @@ -1482,7 +1897,11 @@ static void encode_b(VP10_COMP *cpi, const TileInfo *const tile, encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); if (output_enabled) { +#if CONFIG_SUPERTX + update_stats(&cpi->common, td, 0); +#else update_stats(&cpi->common, td); +#endif } } @@ -1515,6 +1934,81 @@ static void encode_sb(VP10_COMP *cpi, ThreadData *td, if (output_enabled && bsize != BLOCK_4X4) td->counts->partition[ctx][partition]++; +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && + bsize <= MAX_SUPERTX_BLOCK_SIZE && + partition != PARTITION_NONE && + !xd->lossless[0]) { + int supertx_enabled; + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + supertx_enabled = check_supertx_sb(bsize, supertx_size, pc_tree); + if (supertx_enabled) { + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int x_idx, y_idx, i; + uint8_t *dst_buf[3]; + int dst_stride[3]; + set_skip_context(xd, mi_row, mi_col); + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize, + output_enabled, pc_tree); + + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (i = 0; i < MAX_MB_PLANE; i++) { + dst_buf[i] = xd->plane[i].dst.buf; + dst_stride[i] = xd->plane[i].dst.stride; + } + predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col, + output_enabled, bsize, bsize, + dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, x, mi_row, mi_col, bsize); + if (!x->skip) { + xd->mi[0]->mbmi.skip = 1; + vp10_encode_sb_supertx(x, bsize); + vp10_tokenize_sb_supertx(cpi, td, tp, !output_enabled, bsize); + } else { + xd->mi[0]->mbmi.skip = 1; + if (output_enabled) + td->counts->skip[vp10_get_skip_context(xd)][1]++; + reset_skip_context(xd, bsize); + } + if (output_enabled) { + for (y_idx = 0; y_idx < mi_height; y_idx++) + for (x_idx = 0; x_idx < mi_width; x_idx++) { + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height + > y_idx) { + xd->mi[x_idx + y_idx * cm->mi_stride]->mbmi.skip = + xd->mi[0]->mbmi.skip; + } + } + td->counts->supertx + [partition_supertx_context_lookup[partition]][supertx_size][1]++; + td->counts->supertx_size[supertx_size]++; +#if CONFIG_EXT_TX + if (supertx_size <= TX_16X16 && !xd->mi[0]->mbmi.skip) { + int eset = get_ext_tx_types(supertx_size, bsize, 1); + if (eset > 0) { + ++td->counts->inter_ext_tx[eset][supertx_size] + [xd->mi[0]->mbmi.tx_type]; + } + } +#endif // CONFIG_EXT_TX + } + if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); + return; + } else { + if (output_enabled) { + td->counts->supertx + [partition_supertx_context_lookup[partition]][supertx_size][0]++; + } + } + } +#endif // CONFIG_SUPERTX + switch (partition) { case PARTITION_NONE: encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, @@ -1639,6 +2133,9 @@ static void rd_use_partition(VP10_COMP *cpi, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, int64_t *dist, +#if CONFIG_SUPERTX + int *rate_nocoef, +#endif int do_recon, PC_TREE *pc_tree) { VP10_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; @@ -1662,6 +2159,11 @@ static void rd_use_partition(VP10_COMP *cpi, BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx = &pc_tree->none; +#if CONFIG_SUPERTX + int last_part_rate_nocoef = INT_MAX; + int none_rate_nocoef = INT_MAX; + int chosen_rate_nocoef = INT_MAX; +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -1714,8 +2216,11 @@ static void rd_use_partition(VP10_COMP *cpi, mi_row + (mi_step >> 1) < cm->mi_rows && mi_col + (mi_step >> 1) < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, - ctx, INT64_MAX); + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, +#if CONFIG_SUPERTX + &none_rate_nocoef, +#endif + bsize, ctx, INT64_MAX); pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -1723,6 +2228,9 @@ static void rd_use_partition(VP10_COMP *cpi, none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); +#if CONFIG_SUPERTX + none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } restore_context(x, mi_row, mi_col, a, l, sa, sl, @@ -1738,68 +2246,110 @@ static void rd_use_partition(VP10_COMP *cpi, switch (partition) { case PARTITION_NONE: rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif bsize, ctx, INT64_MAX); break; case PARTITION_HORZ: rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif subsize, &pc_tree->horizontal[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { RD_COST tmp_rdc; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; vp10_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, &tmp_rdc, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif subsize, &pc_tree->horizontal[1], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp10_rd_cost_reset(&last_part_rdc); +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; last_part_rdc.rdcost += tmp_rdc.rdcost; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif } break; case PARTITION_VERT: rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif subsize, &pc_tree->vertical[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { RD_COST tmp_rdc; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; vp10_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp10_rd_cost_reset(&last_part_rdc); +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; last_part_rdc.rdcost += tmp_rdc.rdcost; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif } break; case PARTITION_SPLIT: if (bsize == BLOCK_8X8) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, +#if CONFIG_SUPERTX + &last_part_rate_nocoef, +#endif subsize, pc_tree->leaf_split[0], INT64_MAX); break; } last_part_rdc.rate = 0; last_part_rdc.dist = 0; last_part_rdc.rdcost = 0; +#if CONFIG_SUPERTX + last_part_rate_nocoef = 0; +#endif for (i = 0; i < 4; i++) { int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; RD_COST tmp_rdc; +#if CONFIG_SUPERTX + int rt_nocoef; +#endif if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; @@ -1808,13 +2358,22 @@ static void rd_use_partition(VP10_COMP *cpi, mi_8x8 + jj * bss * mis + ii * bss, tp, mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, &tmp_rdc.dist, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif i != 3, pc_tree->split[i]); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp10_rd_cost_reset(&last_part_rdc); +#if CONFIG_SUPERTX + last_part_rate_nocoef = INT_MAX; +#endif break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; +#if CONFIG_SUPERTX + last_part_rate_nocoef += rt_nocoef; +#endif } break; default: @@ -1827,6 +2386,9 @@ static void rd_use_partition(VP10_COMP *cpi, last_part_rdc.rate += cpi->partition_cost[pl][partition]; last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); +#if CONFIG_SUPERTX + last_part_rate_nocoef += cpi->partition_cost[pl][partition]; +#endif } if (do_partition_search @@ -1840,6 +2402,9 @@ static void rd_use_partition(VP10_COMP *cpi, BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rdc.rate = 0; chosen_rdc.dist = 0; +#if CONFIG_SUPERTX + chosen_rate_nocoef = 0; +#endif restore_context(x, mi_row, mi_col, a, l, sa, sl, #if CONFIG_VAR_TX ta, tl, @@ -1852,6 +2417,9 @@ static void rd_use_partition(VP10_COMP *cpi, int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); RD_COST tmp_rdc; +#if CONFIG_SUPERTX + int rt_nocoef = 0; +#endif ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; #if CONFIG_VAR_TX @@ -1869,6 +2437,9 @@ static void rd_use_partition(VP10_COMP *cpi, pc_tree->split[i]->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, +#if CONFIG_SUPERTX + &rt_nocoef, +#endif split_subsize, &pc_tree->split[i]->none, INT64_MAX); restore_context(x, mi_row, mi_col, a, l, sa, sl, @@ -1879,11 +2450,17 @@ static void rd_use_partition(VP10_COMP *cpi, if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp10_rd_cost_reset(&chosen_rdc); +#if CONFIG_SUPERTX + chosen_rate_nocoef = INT_MAX; +#endif break; } chosen_rdc.rate += tmp_rdc.rate; chosen_rdc.dist += tmp_rdc.dist; +#if CONFIG_SUPERTX + chosen_rate_nocoef += rt_nocoef; +#endif if (i != 3) encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, @@ -1892,12 +2469,18 @@ static void rd_use_partition(VP10_COMP *cpi, pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; +#if CONFIG_SUPERTX + chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; +#endif } pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rdc.rate < INT_MAX) { chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); +#if CONFIG_SUPERTX + chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } } @@ -1907,12 +2490,18 @@ static void rd_use_partition(VP10_COMP *cpi, if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; chosen_rdc = last_part_rdc; +#if CONFIG_SUPERTX + chosen_rate_nocoef = last_part_rate_nocoef; +#endif } // If none was better set the partitioning to that. if (none_rdc.rdcost < chosen_rdc.rdcost) { if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; chosen_rdc = none_rdc; +#if CONFIG_SUPERTX + chosen_rate_nocoef = none_rate_nocoef; +#endif } #if CONFIG_VAR_TX @@ -1938,6 +2527,9 @@ static void rd_use_partition(VP10_COMP *cpi, *rate = chosen_rdc.rate; *dist = chosen_rdc.dist; +#if CONFIG_SUPERTX + *rate_nocoef = chosen_rate_nocoef; +#endif } static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { @@ -2187,6 +2779,9 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *rate_nocoef, +#endif int64_t best_rd, PC_TREE *pc_tree) { VP10_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; @@ -2203,6 +2798,13 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, int i, pl; BLOCK_SIZE subsize; RD_COST this_rdc, sum_rdc, best_rdc; +#if CONFIG_SUPERTX + int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX; + int tmp_rate; + int abort_flag; + int64_t tmp_dist, tmp_rd; + PARTITION_TYPE best_partition; +#endif // CONFIG_SUPERTX int do_split = bsize >= BLOCK_8X8; int do_rect = 1; @@ -2332,14 +2934,20 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, // PARTITION_NONE if (partition_none_allowed) { - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, - &this_rdc, bsize, ctx, best_rdc.rdcost); + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, +#if CONFIG_SUPERTX + &this_rate_nocoef, +#endif + bsize, ctx, best_rdc.rdcost); if (this_rdc.rate != INT_MAX) { if (bsize >= BLOCK_8X8) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); +#if CONFIG_SUPERTX + this_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; +#endif } if (this_rdc.rdcost < best_rdc.rdcost) { @@ -2347,6 +2955,10 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr; best_rdc = this_rdc; +#if CONFIG_SUPERTX + best_rate_nocoef = this_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; @@ -2421,7 +3033,7 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); #else restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); -#endif +#endif // CONFIG_VAR_TX } // store estimated motion vector @@ -2438,14 +3050,82 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) pc_tree->leaf_split[0]->pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if CONFIG_SUPERTX + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, + &sum_rate_nocoef, subsize, pc_tree->leaf_split[0], + INT64_MAX); +#else rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, pc_tree->leaf_split[0], best_rdc.rdcost); - if (sum_rdc.rate == INT_MAX) +#endif // CONFIG_SUPERTX + if (sum_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; +#if CONFIG_SUPERTX + sum_rate_nocoef = INT_MAX; +#endif + } +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && sum_rdc.rdcost < INT64_MAX && + !xd->lossless[0]) { + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_SPLIT; + + sum_rdc.rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size], + 0); + sum_rdc.rdcost = + RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) { +#if CONFIG_EXT_TX + TX_TYPE best_tx = DCT_DCT; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX + rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, + &tmp_rate, &tmp_dist, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size], + 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rdc.rdcost) { + sum_rdc.rdcost = tmp_rd; + sum_rdc.rate = tmp_rate; + sum_rdc.dist = tmp_dist; + update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif // CONFIG_SUPERTX } else { +#if CONFIG_SUPERTX + for (i = 0; i < 4 && sum_rdc.rdcost < INT64_MAX; ++i) { +#else for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { - const int x_idx = (i & 1) * mi_step; - const int y_idx = (i >> 1) * mi_step; +#endif // CONFIG_SUPERTX + const int x_idx = (i & 1) * mi_step; + const int y_idx = (i >> 1) * mi_step; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; @@ -2454,20 +3134,90 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, load_pred_mv(x, ctx); pc_tree->split[i]->index = i; +#if CONFIG_SUPERTX + rd_pick_partition(cpi, td, tile_data, tp, + mi_row + y_idx, mi_col + x_idx, + subsize, &this_rdc, &this_rate_nocoef, + INT64_MAX - sum_rdc.rdcost, pc_tree->split[i]); +#else rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); +#endif // CONFIG_SUPERTX if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; +#if CONFIG_SUPERTX + sum_rate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX break; } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif // CONFIG_SUPERTX } } +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && + sum_rdc.rdcost < INT64_MAX && + i == 4 && bsize <= MAX_SUPERTX_BLOCK_SIZE && + !xd->lossless[0]) { + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_SPLIT; + + sum_rdc.rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size], + 0); + sum_rdc.rdcost = + RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + + if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { +#if CONFIG_EXT_TX + TX_TYPE best_tx = DCT_DCT; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = + xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX + rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, + &tmp_rate, &tmp_dist, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_SPLIT]][supertx_size], + 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rdc.rdcost) { + sum_rdc.rdcost = tmp_rd; + sum_rdc.rate = tmp_rate; + sum_rdc.dist = tmp_dist; + update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif // CONFIG_SUPERTX } if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) { @@ -2475,9 +3225,16 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT]; +#endif // CONFIG_SUPERTX if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif // CONFIG_SUPERTX pc_tree->partitioning = PARTITION_SPLIT; } } else { @@ -2493,22 +3250,32 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, #else restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); #endif - } + } // if (do_split) // PARTITION_HORZ if (partition_horz_allowed && (do_rect || vp10_active_h_edge(cpi, mi_row, mi_step))) { - subsize = get_subsize(bsize, PARTITION_HORZ); + subsize = get_subsize(bsize, PARTITION_HORZ); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, - &pc_tree->horizontal[0], best_rdc.rdcost); - - if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, +#if CONFIG_SUPERTX + &sum_rate_nocoef, +#endif // CONFIG_SUPERTX + subsize, &pc_tree->horizontal[0], best_rdc.rdcost); + +#if CONFIG_SUPERTX + abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) || + (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8); + if (sum_rdc.rdcost < INT64_MAX && +#else + if (sum_rdc.rdcost < best_rdc.rdcost && +#endif // CONFIG_SUPERTX + mi_row + mi_step < cm->mi_rows && bsize > BLOCK_8X8) { PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); @@ -2520,24 +3287,98 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, partition_none_allowed) pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if CONFIG_SUPERTX + rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, + &this_rdc, &this_rate_nocoef, + subsize, &pc_tree->horizontal[1], + INT64_MAX); +#else rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1], best_rdc.rdcost - sum_rdc.rdcost); +#endif // CONFIG_SUPERTX if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; +#if CONFIG_SUPERTX + sum_rate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif // CONFIG_SUPERTX } } +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && !abort_flag && + sum_rdc.rdcost < INT64_MAX && bsize <= MAX_SUPERTX_BLOCK_SIZE && + !xd->lossless[0]) { + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_HORZ; + + sum_rdc.rate += vp10_cost_bit( + cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]] + [supertx_size], 0); + sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + + if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { +#if CONFIG_EXT_TX + TX_TYPE best_tx = DCT_DCT; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX + rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, + &tmp_rate, &tmp_dist, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_HORZ]][supertx_size], + 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rdc.rdcost) { + sum_rdc.rdcost = tmp_rd; + sum_rdc.rate = tmp_rate; + sum_rdc.dist = tmp_dist; + update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif // CONFIG_SUPERTX + if (sum_rdc.rdcost < best_rdc.rdcost) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_HORZ]; +#endif // CONFIG_SUPERTX if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif // CONFIG_SUPERTX pc_tree->partitioning = PARTITION_HORZ; } } @@ -2552,7 +3393,7 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, // PARTITION_VERT if (partition_vert_allowed && (do_rect || vp10_active_v_edge(cpi, mi_col, mi_step))) { - subsize = get_subsize(bsize, PARTITION_VERT); + subsize = get_subsize(bsize, PARTITION_VERT); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); @@ -2560,9 +3401,19 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, partition_none_allowed) pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, - &pc_tree->vertical[0], best_rdc.rdcost); - if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, +#if CONFIG_SUPERTX + &sum_rate_nocoef, +#endif // CONFIG_SUPERTX + subsize, &pc_tree->vertical[0], best_rdc.rdcost); +#if CONFIG_SUPERTX + abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) || + (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8); + if (sum_rdc.rdcost < INT64_MAX && +#else + if (sum_rdc.rdcost < best_rdc.rdcost && +#endif // CONFIG_SUPERTX + mi_col + mi_step < cm->mi_cols && bsize > BLOCK_8X8) { update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, @@ -2574,25 +3425,96 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, partition_none_allowed) pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; +#if CONFIG_SUPERTX + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, + &this_rate_nocoef, subsize, &pc_tree->vertical[1], + INT64_MAX - sum_rdc.rdcost); +#else rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, subsize, &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost); +#endif // CONFIG_SUPERTX if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; +#if CONFIG_SUPERTX + sum_rate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; +#if CONFIG_SUPERTX + sum_rate_nocoef += this_rate_nocoef; +#endif // CONFIG_SUPERTX } } +#if CONFIG_SUPERTX + if (!frame_is_intra_only(cm) && !abort_flag && + sum_rdc.rdcost < INT64_MAX && bsize <= MAX_SUPERTX_BLOCK_SIZE && + !xd->lossless[0]) { + TX_SIZE supertx_size = max_txsize_lookup[bsize]; + best_partition = pc_tree->partitioning; + pc_tree->partitioning = PARTITION_VERT; + sum_rdc.rate += vp10_cost_bit( + cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]] + [supertx_size], 0); + sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + + if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { +#if CONFIG_EXT_TX + TX_TYPE best_tx = DCT_DCT; +#endif + + tmp_rate = sum_rate_nocoef; + tmp_dist = 0; +#if CONFIG_VAR_TX + xd->above_txfm_context = cm->above_txfm_context + mi_col; + xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & 0x07); + restore_context(x, mi_row, mi_col, a, l, sa, sl, ta, tl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#endif // CONFIG_VAR_TX + rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, + &tmp_rate, &tmp_dist, +#if CONFIG_EXT_TX + &best_tx, +#endif + pc_tree); + + tmp_rate += vp10_cost_bit( + cm->fc->supertx_prob + [partition_supertx_context_lookup[PARTITION_VERT]][supertx_size], + 1); + tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist); + if (tmp_rd < sum_rdc.rdcost) { + sum_rdc.rdcost = tmp_rd; + sum_rdc.rate = tmp_rate; + sum_rdc.dist = tmp_dist; + update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, +#if CONFIG_EXT_TX + best_tx, +#endif + supertx_size, pc_tree); + } + } + pc_tree->partitioning = best_partition; + } +#endif // CONFIG_SUPERTX if (sum_rdc.rdcost < best_rdc.rdcost) { pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); +#if CONFIG_SUPERTX + sum_rate_nocoef += cpi->partition_cost[pl][PARTITION_VERT]; +#endif // CONFIG_SUPERTX if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; +#if CONFIG_SUPERTX + best_rate_nocoef = sum_rate_nocoef; + assert(best_rate_nocoef >= 0); +#endif // CONFIG_SUPERTX pc_tree->partitioning = PARTITION_VERT; } } @@ -2611,6 +3533,9 @@ static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td, // checks occur in some sub function and thus are used... (void) best_rd; *rd_cost = best_rdc; +#if CONFIG_SUPERTX + *rate_nocoef = best_rate_nocoef; +#endif // CONFIG_SUPERTX if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && pc_tree->index != 3) { @@ -2654,6 +3579,9 @@ static void encode_rd_sb_row(VP10_COMP *cpi, int dummy_rate; int64_t dummy_dist; RD_COST dummy_rdc; +#if CONFIG_SUPERTX + int dummy_rate_nocoef; +#endif // CONFIG_SUPERTX int i; int seg_skip = 0; @@ -2689,19 +3617,31 @@ static void encode_rd_sb_row(VP10_COMP *cpi, set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); + BLOCK_64X64, &dummy_rate, &dummy_dist, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + 1, td->pc_root); } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); + BLOCK_64X64, &dummy_rate, &dummy_dist, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + 1, td->pc_root); } else if (sf->partition_search_type == VAR_BASED_PARTITION && cm->frame_type != KEY_FRAME) { choose_partitioning(cpi, tile_info, x, mi_row, mi_col); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); + BLOCK_64X64, &dummy_rate, &dummy_dist, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + 1, td->pc_root); } else { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { @@ -2711,7 +3651,11 @@ static void encode_rd_sb_row(VP10_COMP *cpi, &x->max_partition_size); } rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rdc, INT64_MAX, td->pc_root); + &dummy_rdc, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + INT64_MAX, td->pc_root); } } } @@ -3105,16 +4049,31 @@ void vp10_encode_frame(VP10_COMP *cpi) { count32x32 += counts->tx.p32x32[i][TX_32X32]; } if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && +#if CONFIG_SUPERTX + cm->counts.supertx_size[TX_16X16] == 0 && + cm->counts.supertx_size[TX_32X32] == 0 && +#endif // CONFIG_SUPERTX count32x32 == 0) { cm->tx_mode = ALLOW_8X8; reset_skip_tx_size(cm, TX_8X8); } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && - count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { + count8x8_lp == 0 && count16x16_lp == 0 && +#if CONFIG_SUPERTX + cm->counts.supertx_size[TX_8X8] == 0 && + cm->counts.supertx_size[TX_16X16] == 0 && + cm->counts.supertx_size[TX_32X32] == 0 && +#endif // CONFIG_SUPERTX + count32x32 == 0) { cm->tx_mode = ONLY_4X4; reset_skip_tx_size(cm, TX_4X4); - } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { + } else if (count8x8_lp == 0 && count16x16_lp == 0 && + count4x4 == 0) { cm->tx_mode = ALLOW_32X32; - } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { + } else if (count32x32 == 0 && count8x8_lp == 0 && +#if CONFIG_SUPERTX + cm->counts.supertx_size[TX_32X32] == 0 && +#endif // CONFIG_SUPERTX + count4x4 == 0) { cm->tx_mode = ALLOW_16X16; reset_skip_tx_size(cm, TX_16X16); } @@ -3444,3 +4403,790 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, } #endif } + +#if CONFIG_SUPERTX +static int check_intra_b(PICK_MODE_CONTEXT *ctx) { + return !is_inter_mode((&ctx->mic)->mbmi.mode); +} + +static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize = bsize; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return 1; + + if (bsize >= BLOCK_8X8) + subsize = get_subsize(bsize, pc_tree->partitioning); + else + subsize = BLOCK_4X4; + + partition = partition_lookup[bsl][subsize]; + + switch (partition) { + case PARTITION_NONE: + return check_intra_b(&pc_tree->none); + break; + case PARTITION_VERT: + if (check_intra_b(&pc_tree->vertical[0])) + return 1; + if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { + if (check_intra_b(&pc_tree->vertical[1])) + return 1; + } + break; + case PARTITION_HORZ: + if (check_intra_b(&pc_tree->horizontal[0])) + return 1; + if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { + if (check_intra_b(&pc_tree->horizontal[1])) + return 1; + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + if (check_intra_b(pc_tree->leaf_split[0])) + return 1; + } else { + if (check_intra_sb(cpi, tile, mi_row, mi_col, subsize, + pc_tree->split[0])) + return 1; + if (check_intra_sb(cpi, tile, mi_row, mi_col + hbs, subsize, + pc_tree->split[1])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col, subsize, + pc_tree->split[2])) + return 1; + if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col + hbs, subsize, + pc_tree->split[3])) + return 1; + } + break; + default: + assert(0); + } + return 0; +} + +static int check_supertx_b(TX_SIZE supertx_size, PICK_MODE_CONTEXT *ctx) { + return ctx->mic.mbmi.tx_size == supertx_size; +} + +static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, + PC_TREE *pc_tree) { + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + partition = pc_tree->partitioning; + subsize = get_subsize(bsize, partition); + switch (partition) { + case PARTITION_NONE: + return check_supertx_b(supertx_size, &pc_tree->none); + case PARTITION_VERT: + return check_supertx_b(supertx_size, &pc_tree->vertical[0]); + case PARTITION_HORZ: + return check_supertx_b(supertx_size, &pc_tree->horizontal[0]); + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) + return check_supertx_b(supertx_size, pc_tree->leaf_split[0]); + else + return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]); + default: + assert(0); + return 0; + } +} + +static void predict_superblock(VP10_COMP *cpi, ThreadData *td, + int mi_row_pred, int mi_col_pred, + BLOCK_SIZE bsize_pred, int b_sub8x8, int block) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi_8x8 = xd->mi[0]; + MODE_INFO *mi = mi_8x8; + MB_MODE_INFO *mbmi = &mi->mbmi; + int ref; + const int is_compound = has_second_ref(mbmi); + + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + + for (ref = 0; ref < 1 + is_compound; ++ref) { + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, + mbmi->ref_frame[ref]); + vp10_setup_pre_planes(xd, ref, cfg, mi_row_pred, mi_col_pred, + &xd->block_refs[ref]->sf); + } + + if (!b_sub8x8) + vp10_build_inter_predictors_sb(xd, mi_row_pred, mi_col_pred, bsize_pred); + else + vp10_build_inter_predictors_sb_sub8x8(xd, mi_row_pred, mi_col_pred, + bsize_pred, block); +} + +static void predict_b_extend(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, + int mi_row_ori, int mi_col_ori, + int mi_row_pred, int mi_col_pred, + int mi_row_top, int mi_col_top, + uint8_t * dst_buf[3], int dst_stride[3], + BLOCK_SIZE bsize_ori, BLOCK_SIZE bsize_top, + BLOCK_SIZE bsize_pred, int output_enabled, + int b_sub8x8, int bextend) { + // Used in supertx + // (mi_row_ori, mi_col_ori): location for mv + // (mi_row_pred, mi_col_pred, bsize_pred): region to predict + // (mi_row_top, mi_col_top, bsize_top): region of the top partition size + // block: sub location of sub8x8 blocks + // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8 + // bextend: 1: region to predict is an extension of ori; 0: not + + MACROBLOCK *const x = &td->mb; + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + int r = (mi_row_pred - mi_row_top) * MI_SIZE; + int c = (mi_col_pred - mi_col_top) * MI_SIZE; + const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top]; + const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top]; + + if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top || + mi_row_pred >= mi_row_top + mi_height_top || + mi_col_pred >= mi_col_top + mi_width_top || + mi_row_pred >= cm->mi_rows || mi_col_pred >= cm->mi_cols) + return; + + set_offsets_extend(cpi, td, tile, mi_row_pred, mi_col_pred, + mi_row_ori, mi_col_ori, bsize_pred, bsize_ori); + xd->plane[0].dst.stride = dst_stride[0]; + xd->plane[1].dst.stride = dst_stride[1]; + xd->plane[2].dst.stride = dst_stride[2]; + xd->plane[0].dst.buf = dst_buf[0] + + (r >> xd->plane[0].subsampling_y) * dst_stride[0] + + (c >> xd->plane[0].subsampling_x); + xd->plane[1].dst.buf = dst_buf[1] + + (r >> xd->plane[1].subsampling_y) * dst_stride[1] + + (c >> xd->plane[1].subsampling_x); + xd->plane[2].dst.buf = dst_buf[2] + + (r >> xd->plane[2].subsampling_y) * dst_stride[2] + + (c >> xd->plane[2].subsampling_x); + + predict_superblock(cpi, td, + mi_row_pred, mi_col_pred, bsize_pred, + b_sub8x8, block); + + if (output_enabled && !bextend) + update_stats(&cpi->common, td, 1); +} + +static void extend_dir(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, + uint8_t * dst_buf[3], int dst_stride[3], int dir) { + // dir: 0-lower, 1-upper, 2-left, 3-right + // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright + MACROBLOCKD *xd = &td->mb.e_mbd; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + int xss = xd->plane[1].subsampling_x; + int yss = xd->plane[1].subsampling_y; + int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0; + + BLOCK_SIZE extend_bsize; + int unit, mi_row_pred, mi_col_pred; + + if (dir == 0 || dir == 1) { // lower and upper + extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss) ? + BLOCK_8X8 : BLOCK_16X8; + unit = num_8x8_blocks_wide_lookup[extend_bsize]; + mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1); + mi_col_pred = mi_col; + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, + mi_row_top, mi_col_top, dst_buf, dst_stride, + bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + + if (mi_width > unit) { + int i; + for (i = 0; i < mi_width/unit - 1; i++) { + mi_col_pred += unit; + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } + } + } else if (dir == 2 || dir == 3) { // left and right + extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss) ? + BLOCK_8X8 : BLOCK_8X16; + unit = num_8x8_blocks_high_lookup[extend_bsize]; + mi_row_pred = mi_row; + mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1); + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + + if (mi_height > unit) { + int i; + for (i = 0; i < mi_height/unit - 1; i++) { + mi_row_pred += unit; + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } + } + } else { + extend_bsize = BLOCK_8X8; + mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1); + mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1); + + predict_b_extend(cpi, td, tile, block, mi_row, mi_col, + mi_row_pred, mi_col_pred, mi_row_top, mi_col_top, + dst_buf, dst_stride, bsize, top_bsize, extend_bsize, + output_enabled, b_sub8x8, 1); + } +} + +static void extend_all(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int block, + BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, + uint8_t * dst_buf[3], int dst_stride[3]) { + assert(block >= 0 && block < 4); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 0); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 1); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 2); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 3); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 4); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 5); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 6); + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride, 7); +} + + +// This function generates prediction for multiple blocks, between which +// discontinuity around boundary is reduced by smoothing masks. The basic +// smoothing mask is a soft step function along horz/vert direction. In more +// complicated case when a block is split into 4 subblocks, the basic mask is +// first applied to neighboring subblocks (2 pairs) in horizontal direction and +// then applied to the 2 masked prediction mentioned above in vertical direction +// If the block is split into more than one level, at every stage, masked +// prediction is stored in dst_buf[] passed from higher level. +static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, + int mi_row_top, int mi_col_top, + int output_enabled, BLOCK_SIZE bsize, + BLOCK_SIZE top_bsize, + uint8_t *dst_buf[3], int dst_stride[3], + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + + const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + int i, ctx; + uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3]; + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf3[MAX_MB_PLANE * MAXTXLEN * MAXTXLEN * 2]); + int dst_stride1[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride2[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; + int dst_stride3[3] = {MAXTXLEN, MAXTXLEN, MAXTXLEN}; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int len = sizeof(uint16_t); + dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAXTXLEN * MAXTXLEN * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAXTXLEN * MAXTXLEN * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN * len); + dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3); + dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAXTXLEN * MAXTXLEN * len); + dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN * len); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + MAXTXLEN * MAXTXLEN; + dst_buf1[2] = tmp_buf1 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + MAXTXLEN * MAXTXLEN; + dst_buf2[2] = tmp_buf2 + 2 * MAXTXLEN * MAXTXLEN; + dst_buf3[0] = tmp_buf3; + dst_buf3[1] = tmp_buf3 + MAXTXLEN * MAXTXLEN; + dst_buf3[2] = tmp_buf3 + 2 * MAXTXLEN * MAXTXLEN; +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + subsize = get_subsize(bsize, pc_tree->partitioning); + } else { + ctx = 0; + subsize = BLOCK_4X4; + } + partition = partition_lookup[bsl][subsize]; + if (output_enabled && bsize != BLOCK_4X4 && bsize < top_bsize) + cm->counts.partition[ctx][partition]++; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + } + + switch (partition) { + case PARTITION_NONE: + assert(bsize < top_bsize); + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + bsize, top_bsize, bsize, output_enabled, 0, 0); + extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride); + break; + case PARTITION_HORZ: + if (bsize == BLOCK_8X8) { + // Fisrt half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + + // Second half + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + + // Smooth + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, 0); + } else { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, subsize, output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride, 0); + + if (mi_row + hbs < cm->mi_rows) { + // Second half + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, subsize, top_bsize, subsize, + output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1, 1); + + // Smooth + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_HORZ, i); + } + } + } + break; + case PARTITION_VERT: + if (bsize == BLOCK_8X8) { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + + // Second half + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + + // Smooth + xd->plane[0].dst.buf = dst_buf[0]; + xd->plane[0].dst.stride = dst_stride[0]; + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[0], dst_stride[0], + dst_buf1[0], dst_stride1[0], + &xd->plane[0], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, 0); + } else { + // bsize: not important, not useful + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, subsize, output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride, 3); + + + if (mi_col + hbs < cm->mi_cols) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, + mi_row, mi_col + hbs, mi_row_top, mi_col_top, + dst_buf1, dst_stride1, subsize, top_bsize, subsize, + output_enabled, 0, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1, 2); + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + vp10_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + &xd->plane[i], mi_row, mi_col, mi_row_top, mi_col_top, + bsize, top_bsize, PARTITION_VERT, i); + } + } + } + break; + case PARTITION_SPLIT: + if (bsize == BLOCK_8X8) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf, dst_stride, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 0); + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf1, dst_stride1, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf2, dst_stride2, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, dst_buf3, dst_stride3, + subsize, top_bsize, BLOCK_8X8, output_enabled, 1, 1); + + if (bsize < top_bsize) { + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf, dst_stride); + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf1, dst_stride1); + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf2, dst_stride2); + extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, + dst_buf3, dst_stride3); + } + } else { + predict_sb_complex(cpi, td, tile, mi_row, mi_col, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf, dst_stride, + pc_tree->split[0]); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row, mi_col + hbs, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf1, dst_stride1, + pc_tree->split[1]); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf2, dst_stride2, + pc_tree->split[2]); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col + hbs, + mi_row_top, mi_col_top, output_enabled, subsize, + top_bsize, dst_buf3, dst_stride3, + pc_tree->split[3]); + } + for (i = 0; i < MAX_MB_PLANE; i++) { + if (bsize == BLOCK_8X8 && i != 0) + continue; // Skip <4x4 chroma smoothing + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf1[i], + dst_stride1[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + if (mi_row + hbs < cm->mi_rows) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf2[i], + dst_stride2[i], + dst_buf3[i], + dst_stride3[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_VERT, i); + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { + vp10_build_masked_inter_predictor_complex(xd, + dst_buf[i], + dst_stride[i], + dst_buf2[i], + dst_stride2[i], + &xd->plane[i], + mi_row, mi_col, + mi_row_top, mi_col_top, + bsize, top_bsize, + PARTITION_HORZ, i); + } + } + break; + default: + assert(0); + } + + + if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)) + update_partition_context(xd, mi_row, mi_col, subsize, bsize); +} + +static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td, + const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int *tmp_rate, int64_t *tmp_dist, +#if CONFIG_EXT_TX + TX_TYPE *best_tx, +#endif // CONFIG_EXT_TX + PC_TREE *pc_tree) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + int plane, pnskip, skippable, skippable_uv, rate_uv, this_rate, + base_rate = *tmp_rate; + int64_t sse, pnsse, sse_uv, this_dist, dist_uv; + uint8_t *dst_buf[3]; + int dst_stride[3]; + TX_SIZE tx_size; +#if CONFIG_EXT_TX + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + TX_TYPE tx_type, best_tx_nostx = xd->mi[0]->mbmi.tx_type; + int ext_tx_set; + int tmp_rate_tx = 0, skip_tx = 0; + int64_t tmp_dist_tx = 0, rd_tx, bestrd_tx = INT64_MAX; + uint8_t tmp_zcoeff_blk = 0; +#endif // CONFIG_EXT_TX + + update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize, 0, pc_tree); + vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), + mi_row, mi_col); + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + dst_buf[plane] = xd->plane[plane].dst.buf; + dst_stride[plane] = xd->plane[plane].dst.stride; + } + predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col, + 0, bsize, bsize, dst_buf, dst_stride, pc_tree); + + set_offsets(cpi, tile, x, mi_row, mi_col, bsize); +#if CONFIG_EXT_TX + *best_tx = DCT_DCT; +#endif + + // chroma + skippable_uv = 1; + rate_uv = 0; + dist_uv = 0; + sse_uv = 0; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + tx_size = max_txsize_lookup[bsize]; + tx_size = get_uv_tx_size_impl(tx_size, bsize, + cm->subsampling_x, cm->subsampling_y); + vp10_subtract_plane(x, bsize, plane); + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, &pnsse, + INT64_MAX, plane, bsize, tx_size, 0); + rate_uv += this_rate; + dist_uv += this_dist; + sse_uv += pnsse; + skippable_uv &= pnskip; + } + + // luma + tx_size = max_txsize_lookup[bsize]; + vp10_subtract_plane(x, bsize, 0); +#if CONFIG_EXT_TX + ext_tx_set = get_ext_tx_set(tx_size, bsize, 1); + for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { + if (!ext_tx_used_inter[ext_tx_set][tx_type]) + continue; + mbmi->tx_type = tx_type; + if (ext_tx_set == 1 && + mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && + *best_tx == DCT_DCT) { + tx_type = IDTX - 1; + break; + } + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, + &pnsse, INT64_MAX, 0, bsize, tx_size, 0); + if (get_ext_tx_types(tx_size, bsize, 1) > 1 && + !xd->lossless[xd->mi[0]->mbmi.segment_id] && + this_rate != INT_MAX) { + if (ext_tx_set > 0) + this_rate += cpi->inter_tx_type_costs[ext_tx_set] + [mbmi->tx_size][mbmi->tx_type]; + } + *tmp_rate = rate_uv + this_rate; + *tmp_dist = dist_uv + this_dist; + sse = sse_uv + pnsse; + skippable = skippable_uv && pnskip; + if (skippable) { + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } else { + if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) + < RDCOST(x->rdmult, x->rddiv, 0, sse)) { + *tmp_rate += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0); + x->skip = 0; + } else { + *tmp_dist = sse; + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } + } + *tmp_rate += base_rate; + rd_tx = RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist); + if (rd_tx < bestrd_tx * 0.99 || tx_type == DCT_DCT) { + *best_tx = tx_type; + bestrd_tx = rd_tx; + tmp_rate_tx = *tmp_rate; + tmp_dist_tx = *tmp_dist; + skip_tx = x->skip; + tmp_zcoeff_blk = x->zcoeff_blk[tx_size][0]; + } + } + x->zcoeff_blk[tx_size][0] = tmp_zcoeff_blk; + *tmp_rate = tmp_rate_tx; + *tmp_dist = tmp_dist_tx; + x->skip = skip_tx; + xd->mi[0]->mbmi.tx_type = best_tx_nostx; + +#else // CONFIG_EXT_TX + + vp10_txfm_rd_in_plane_supertx(x, +#if CONFIG_VAR_TX + cpi, +#endif + &this_rate, &this_dist, &pnskip, &pnsse, + INT64_MAX, 0, bsize, tx_size, 0); + *tmp_rate = rate_uv + this_rate; + *tmp_dist = dist_uv + this_dist; + sse = sse_uv + pnsse; + skippable = skippable_uv && pnskip; + if (skippable) { + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } else { + if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) + < RDCOST(x->rdmult, x->rddiv, 0, sse)) { + *tmp_rate += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0); + x->skip = 0; + } else { + *tmp_dist = sse; + *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); + x->skip = 1; + } + } + *tmp_rate += base_rate; +#endif // CONFIG_EXT_TX +} +#endif // CONFIG_SUPERTX diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 15321cd53..68cf93298 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -696,6 +696,30 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { } } +#if CONFIG_SUPERTX +void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + struct optimize_ctx ctx; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + struct encode_b_args arg = {x, &ctx, &mbmi->skip}; + int plane; + + mbmi->skip = 1; + if (x->skip) + return; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane* const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; + vp10_subtract_plane(x, bsize, plane); + vp10_get_entropy_contexts(bsize, tx_size, pd, + ctx.ta[plane], ctx.tl[plane]); + vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, + &arg); + } +} +#endif // CONFIG_SUPERTX + void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h index 01ecc95cc..e208c8879 100644 --- a/vp10/encoder/encodemb.h +++ b/vp10/encoder/encodemb.h @@ -33,6 +33,9 @@ typedef enum VP10_XFORM_QUANT { } VP10_XFORM_QUANT; void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); +#if CONFIG_SUPERTX +void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize); +#endif // CONFIG_SUPERTX void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 57cd31d8a..661186846 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -457,11 +457,11 @@ static int cost_coeffs(MACROBLOCK *x, const int16_t *cat6_high_cost = vp10_get_high_cost_table(8); #endif -#if !CONFIG_VAR_TX +#if !CONFIG_VAR_TX && !CONFIG_SUPERTX // Check for consistency of tx_size with mode info assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size : get_uv_tx_size(mbmi, pd) == tx_size); -#endif +#endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX if (eob == 0) { // single eob token @@ -732,6 +732,54 @@ static void txfm_rd_in_plane(MACROBLOCK *x, } } +#if CONFIG_SUPERTX +void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x, +#if CONFIG_VAR_TX + const VP10_COMP *cpi, +#endif + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting) { + MACROBLOCKD *const xd = &x->e_mbd; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + struct rdcost_block_args args; + TX_TYPE tx_type; + + vp10_zero(args); + args.x = x; +#if CONFIG_VAR_TX + args.cpi = cpi; +#endif + args.best_rd = ref_best_rd; + args.use_fast_coef_costing = use_fast_coef_casting; + + if (plane == 0) + xd->mi[0]->mbmi.tx_size = tx_size; + + vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); + + tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size); + args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + + block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), + tx_size, &args); + + if (args.exit_early) { + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } else { + *distortion = args.this_dist; + *rate = args.this_rate; + *sse = args.this_sse; + *skippable = !x->plane[plane].eobs[0]; + } +} +#endif // CONFIG_SUPERTX + static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *sse, @@ -4855,7 +4903,11 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, int mi_row, int mi_col, - RD_COST *rd_cost, BLOCK_SIZE bsize, + RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif // CONFIG_SUPERTX + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { VP10_COMMON *const cm = &cpi->common; @@ -4954,6 +5006,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, } rd_cost->rate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; @@ -5375,7 +5430,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, if (skippable) { // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); - + rate_y = 0; + rate_uv = 0; // Cost the skip mb case rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1); @@ -5391,6 +5447,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, assert(total_sse >= 0); rate2 -= (rate_y + rate_uv); this_skip2 = 1; + rate_y = 0; + rate_uv = 0; } } else { // Add in the cost of the no skip flag. @@ -5437,6 +5495,15 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, } rd_cost->rate = rate2; +#if CONFIG_SUPERTX + *returnrate_nocoef = rate2 - rate_y - rate_uv; + if (!disable_skip) { + *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd), + skippable || this_skip2); + } + *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd), + mbmi->ref_frame[0] != INTRA_FRAME); +#endif // CONFIG_SUPERTX rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; best_rd = this_rd; @@ -5760,14 +5827,17 @@ void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi, best_pred_diff, best_filter_diff, 0); } -void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, - TileDataEnc *tile_data, - MACROBLOCK *x, - int mi_row, int mi_col, - RD_COST *rd_cost, - BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, - int64_t best_rd_so_far) { +void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, + TileDataEnc *tile_data, + struct macroblock *x, + int mi_row, int mi_col, + struct RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif // CONFIG_SUPERTX + BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far) { VP10_COMMON *const cm = &cpi->common; RD_OPT *const rd_opt = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; @@ -5816,6 +5886,11 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, int internal_active_edge = vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi); +#if CONFIG_SUPERTX + best_rd_so_far = INT64_MAX; + best_rd = best_rd_so_far; + best_yrd = best_rd_so_far; +#endif // CONFIG_SUPERTX memset(x->zcoeff_blk[TX_4X4], 0, 4); vp10_zero(best_mbmode); @@ -5843,6 +5918,9 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, rate_uv_intra = INT_MAX; rd_cost->rate = INT_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { @@ -6300,6 +6378,15 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, } rd_cost->rate = rate2; +#if CONFIG_SUPERTX + *returnrate_nocoef = rate2 - rate_y - rate_uv; + if (!disable_skip) + *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd), + this_skip2); + *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd), + mbmi->ref_frame[0] != INTRA_FRAME); + assert(*returnrate_nocoef > 0); +#endif // CONFIG_SUPERTX rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; best_rd = this_rd; @@ -6402,6 +6489,9 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, if (best_rd >= best_rd_so_far) { rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX return; } @@ -6422,6 +6512,9 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, rd_cost->rate = INT_MAX; rd_cost->dist = INT64_MAX; rd_cost->rdcost = INT64_MAX; +#if CONFIG_SUPERTX + *returnrate_nocoef = INT_MAX; +#endif // CONFIG_SUPERTX return; } diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h index b1a803627..62b0aeaeb 100644 --- a/vp10/encoder/rdopt.h +++ b/vp10/encoder/rdopt.h @@ -43,6 +43,9 @@ void vp10_rd_pick_inter_mode_sb(struct VP10_COMP *cpi, struct macroblock *x, int mi_row, int mi_col, struct RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif // CONFIG_SUPERTX BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); @@ -60,12 +63,27 @@ int vp10_active_v_edge(struct VP10_COMP *cpi, int mi_col, int mi_step); int vp10_active_edge_sb(struct VP10_COMP *cpi, int mi_row, int mi_col); void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, - struct TileDataEnc *tile_data, - struct macroblock *x, - int mi_row, int mi_col, - struct RD_COST *rd_cost, - BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, - int64_t best_rd_so_far); + struct TileDataEnc *tile_data, + struct macroblock *x, + int mi_row, int mi_col, + struct RD_COST *rd_cost, +#if CONFIG_SUPERTX + int *returnrate_nocoef, +#endif // CONFIG_SUPERTX + BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far); + +#if CONFIG_SUPERTX +void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x, +#if CONFIG_VAR_TX + const VP10_COMP *cpi, +#endif // CONFIG_VAR_TX + int *rate, int64_t *distortion, + int *skippable, int64_t *sse, + int64_t ref_best_rd, int plane, + BLOCK_SIZE bsize, TX_SIZE tx_size, + int use_fast_coef_casting); +#endif // CONFIG_SUPERTX #ifdef __cplusplus } // extern "C" diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index 725b857f4..64211a9c9 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c @@ -789,3 +789,40 @@ void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t, vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); } } + +#if CONFIG_SUPERTX +void vp10_tokenize_sb_supertx(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t, + int dry_run, BLOCK_SIZE bsize) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &td->mb.e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + TOKENEXTRA *t_backup = *t; + const int ctx = vp10_get_skip_context(xd); + const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id, + SEG_LVL_SKIP); + struct tokenize_b_args arg = {cpi, td, t}; + if (mbmi->skip) { + if (!dry_run) + td->counts->skip[ctx][1] += skip_inc; + reset_skip_context(xd, bsize); + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) { + int plane; + td->counts->skip[ctx][0] += skip_inc; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b, + &arg); + (*t)->token = EOSB_TOKEN; + (*t)++; + } + } else { + vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); + *t = t_backup; + } +} +#endif // CONFIG_SUPERTX diff --git a/vp10/encoder/tokenize.h b/vp10/encoder/tokenize.h index 0c9f7da2d..c68e6f220 100644 --- a/vp10/encoder/tokenize.h +++ b/vp10/encoder/tokenize.h @@ -62,6 +62,10 @@ void vp10_tokenize_palette_sb(struct ThreadData *const td, TOKENEXTRA **t); void vp10_tokenize_sb(struct VP10_COMP *cpi, struct ThreadData *td, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); +#if CONFIG_SUPERTX +void vp10_tokenize_sb_supertx(struct VP10_COMP *cpi, struct ThreadData *td, + TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); +#endif extern const int16_t *vp10_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to