From: Sarah Parker Date: Tue, 1 Mar 2016 18:12:13 +0000 (-0800) Subject: Adding speed feature interface for ext tx search X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2ca7d42e7edec19a8e23ba2a08b464035cd92d7b;p=libvpx Adding speed feature interface for ext tx search This sets up the interface for 3 speed features that progressively eliminate a greater number of transforms in ext tx using pre-trained support vector machines. Each speed feature still needs to be implemented. Change-Id: Ia508aeadc0cffdc080fb227f357a5d1dfbca08e2 --- diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 96edc0ff7..b54e40257 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -354,6 +354,126 @@ static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, } } +#if CONFIG_EXT_TX +typedef enum { + DCT_1D = 0, + ADST_1D = 1, + FLIPADST_1D = 2, + DST_1D = 3, + TX_TYPES_1D = 4, +} TX_TYPE_1D; + +static int prune_two_for_sby(const VP10_COMP *cpi, + BLOCK_SIZE bsize, + MACROBLOCK *x, + MACROBLOCKD *xd) { + (void) cpi; + (void) bsize; + (void) x; + (void) xd; + return 3; +} + +static int prune_three_for_sby(const VP10_COMP *cpi, + BLOCK_SIZE bsize, + MACROBLOCK *x, + MACROBLOCKD *xd) { + (void) cpi; + (void) bsize; + (void) x; + (void) xd; + return 7; +} + +#endif // CONFIG_EXT_TX + +static int prune_one_for_sby(const VP10_COMP *cpi, + BLOCK_SIZE bsize, + MACROBLOCK *x, + MACROBLOCKD *xd) { + (void) cpi; + (void) bsize; + (void) x; + (void) xd; + return 1; +} + +static int prune_tx_types(const VP10_COMP *cpi, + BLOCK_SIZE bsize, + MACROBLOCK *x, + MACROBLOCKD *xd) { + switch (cpi->sf.tx_type_search) { + case NO_PRUNE: + return 0; + break; + case PRUNE_ONE : + return prune_one_for_sby(cpi, bsize, x, xd); + break; + #if CONFIG_EXT_TX + case PRUNE_TWO : + return prune_two_for_sby(cpi, bsize, x, xd); + break; + case PRUNE_THREE : + return prune_three_for_sby(cpi, bsize, x, xd); + break; + #endif + } + assert(0); + return 0; +} + +static int do_tx_type_search(TX_TYPE tx_type, + int prune) { +// TODO(sarahparker) implement for non ext tx +#if CONFIG_EXT_TX + static TX_TYPE_1D vtx_tab[TX_TYPES] = { + DCT_1D, + ADST_1D, + DCT_1D, + ADST_1D, + FLIPADST_1D, + DCT_1D, + FLIPADST_1D, + ADST_1D, + FLIPADST_1D, + DST_1D, + DCT_1D, + DST_1D, + ADST_1D, + DST_1D, + FLIPADST_1D, + DST_1D, + }; + static TX_TYPE_1D htx_tab[TX_TYPES] = { + DCT_1D, + DCT_1D, + ADST_1D, + ADST_1D, + DCT_1D, + FLIPADST_1D, + FLIPADST_1D, + FLIPADST_1D, + ADST_1D, + DCT_1D, + DST_1D, + ADST_1D, + DST_1D, + FLIPADST_1D, + DST_1D, + DST_1D, + }; + if (tx_type == IDTX) + return 1; + return !(((prune >> vtx_tab[tx_type]) & 1) | + ((prune >> (htx_tab[tx_type] + TX_TYPES_1D)) & 1)); +#else + // temporary to avoid compiler warnings + (void) tx_type; + (void) prune; + return 1; +#endif +} + static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, @@ -394,8 +514,10 @@ static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize, // low enough so that we can skip the mode search. const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2); const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2); - int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); - int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); + int bw_shift = (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); + int bh_shift = (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); + int bw = 1 << bw_shift; + int bh = 1 << bh_shift; int idx, idy; int lw = b_width_log2_lookup[unit_size] + 2; int lh = b_height_log2_lookup[unit_size] + 2; @@ -406,7 +528,7 @@ static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize, for (idx = 0; idx < bw; ++idx) { uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); - int block_idx = (idy << 1) + idx; + int block_idx = (idy << bw_shift) + idx; int low_err_skip = 0; var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, @@ -988,11 +1110,14 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, vpx_prob skip_prob = vp10_get_skip_prob(cm, xd); int s0 = vp10_cost_bit(skip_prob, 0); int s1 = vp10_cost_bit(skip_prob, 1); + const int is_inter = is_inter_block(mbmi); + int prune = 0; #if CONFIG_EXT_TX int ext_tx_set; #endif // CONFIG_EXT_TX - const int is_inter = is_inter_block(mbmi); + if (is_inter && cpi->sf.tx_type_search > 0) + prune = prune_tx_types(cpi, bs, x, xd); mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size); #if CONFIG_EXT_TX @@ -1004,6 +1129,15 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, if (is_inter) { if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue; + if (cpi->sf.tx_type_search > 0) { + if (!do_tx_type_search(tx_type, prune)) + continue; + } else if (ext_tx_set == 1 && + tx_type >= DST_ADST && tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + continue; + } } else { if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) { if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) @@ -1011,15 +1145,15 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, } if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue; + if (ext_tx_set == 1 && + tx_type >= DST_ADST && tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + continue; + } } mbmi->tx_type = tx_type; - if (ext_tx_set == 1 && - mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - continue; - } txfm_rd_in_plane(x, cpi, @@ -1067,12 +1201,15 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, cpi->sf.use_fast_coef_costing); if (r == INT_MAX) continue; - if (is_inter) + if (is_inter) { r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type]; - else + if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune)) + continue; + } else { r += cpi->intra_tx_type_costs[mbmi->tx_size] [intra_mode_to_tx_type_context[mbmi->mode]] [mbmi->tx_type]; + } if (s) this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse); else @@ -1150,13 +1287,17 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE best_tx = max_tx_size; int start_tx, end_tx; const int tx_select = cm->tx_mode == TX_MODE_SELECT; - TX_TYPE tx_type, best_tx_type = DCT_DCT; const int is_inter = is_inter_block(mbmi); + const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); + TX_TYPE tx_type, best_tx_type = DCT_DCT; + int prune = 0; #if CONFIG_EXT_TX int ext_tx_set; #endif // CONFIG_EXT_TX - const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); + if (is_inter && cpi->sf.tx_type_search > 0) + prune = prune_tx_types(cpi, bs, x, xd); + assert(skip_prob > 0); s0 = vp10_cost_bit(skip_prob, 0); s1 = vp10_cost_bit(skip_prob, 1); @@ -1187,6 +1328,15 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, if (is_inter) { if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue; + if (cpi->sf.tx_type_search > 0) { + if (!do_tx_type_search(tx_type, prune)) + continue; + } else if (ext_tx_set == 1 && + tx_type >= DST_ADST && tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + continue; + } } else { if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) { if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) @@ -1194,14 +1344,14 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, } if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue; + if (ext_tx_set == 1 && + tx_type >= DST_ADST && tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + break; + } } mbmi->tx_type = tx_type; - if (ext_tx_set == 1 && - mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - break; - } txfm_rd_in_plane(x, cpi, &r, &d, &s, @@ -1233,12 +1383,15 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, if (n < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] && r != INT_MAX && !FIXED_TX_TYPE) { - if (is_inter) + if (is_inter) { r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type]; - else + if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune)) + continue; + } else { r += cpi->intra_tx_type_costs[mbmi->tx_size] [intra_mode_to_tx_type_context[mbmi->mode]] [mbmi->tx_type]; + } } #endif // CONFIG_EXT_TX @@ -2803,9 +2956,13 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, uint8_t best_blk_skip[256]; const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4); int idx, idy; + int prune = 0; #if CONFIG_EXT_TX int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter); -#endif +#endif // CONFIG_EXT_TX + + if (is_inter && cpi->sf.tx_type_search > 0) + prune = prune_tx_types(cpi, bsize, x, xd); *distortion = INT64_MAX; *rate = INT_MAX; @@ -2821,6 +2978,15 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, if (is_inter) { if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue; + if (cpi->sf.tx_type_search > 0) { + if (!do_tx_type_search(tx_type, prune)) + continue; + } else if (ext_tx_set == 1 && + tx_type >= DST_ADST && tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + continue; + } } else { if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) { if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) @@ -2828,17 +2994,16 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, } if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue; + if (ext_tx_set == 1 && + tx_type >= DST_ADST && tx_type < IDTX && + best_tx_type == DCT_DCT) { + tx_type = IDTX - 1; + break; + } } mbmi->tx_type = tx_type; - if (ext_tx_set == 1 && - mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX && - best_tx_type == DCT_DCT) { - tx_type = IDTX - 1; - break; - } - inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse, bsize, ref_best_rd); @@ -2867,12 +3032,15 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, if (max_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] && this_rate != INT_MAX) { - if (is_inter) + if (is_inter) { this_rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type]; - else + if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune)) + continue; + } else { this_rate += cpi->intra_tx_type_costs[max_tx_size] [intra_mode_to_tx_type_context[mbmi->mode]] [mbmi->tx_type]; + } } #endif // CONFIG_EXT_TX diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c index 1f70dcbcc..ec8acdae2 100644 --- a/vp10/encoder/speed_features.c +++ b/vp10/encoder/speed_features.c @@ -159,7 +159,7 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm, sf->tx_size_search_breakout = 1; sf->partition_search_breakout_rate_thr = 80; - + sf->tx_type_search = PRUNE_ONE; // Use transform domain distortion. // Note var-tx expt always uses pixel domain distortion. sf->use_transform_domain_distortion = 1; @@ -177,6 +177,9 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm, sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->allow_partition_search_skip = 1; +#if CONFIG_EXT_TX + sf->tx_type_search = PRUNE_TWO; +#endif } if (speed >= 3) { @@ -195,6 +198,9 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm, sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; sf->adaptive_interp_filter_search = 1; +#if CONFIG_EXT_TX + sf->tx_type_search = PRUNE_THREE; +#endif } if (speed >= 4) { @@ -473,6 +479,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) { sf->alt_ref_search_fp = 0; sf->use_quant_fp = 0; sf->partition_search_type = SEARCH_PARTITION; + sf->tx_type_search = NO_PRUNE; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; sf->auto_min_max_partition_size = NOT_IN_USE; diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h index 4f931d80e..fbb69882c 100644 --- a/vp10/encoder/speed_features.h +++ b/vp10/encoder/speed_features.h @@ -181,6 +181,18 @@ typedef enum { FLAG_SKIP_MULTITAP_SHARP = 1 << MULTITAP_SHARP, } INTERP_FILTER_MASK; +typedef enum { + NO_PRUNE = 0, + // eliminates one tx type in vertical and horizontal direction + PRUNE_ONE = 1, +#if CONFIG_EXT_TX + // eliminates two tx types in each direction + PRUNE_TWO = 2, + // eliminates three tx types in each direction + PRUNE_THREE = 3, +#endif +} TX_TYPE_SEARCH; + typedef enum { // Search partitions using RD criterion SEARCH_PARTITION, @@ -298,6 +310,7 @@ typedef struct SPEED_FEATURES { PARTITION_SEARCH_TYPE partition_search_type; + TX_TYPE_SEARCH tx_type_search; // Used if partition_search_type = FIXED_SIZE_PARTITION BLOCK_SIZE always_this_block_size;