From: Alex Converse Date: Wed, 17 Feb 2016 21:39:44 +0000 (-0800) Subject: Better workaround for Bug 1089. X-Git-Tag: v1.6.0~350 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=09f9c5d7f90bd19dfc1994926a6e35680ba9545c;p=libvpx Better workaround for Bug 1089. Don't initialize first pass costs for a number of symbols where first pass probabilities aren't initialized. This brings a 1.22x first pass speedup. https://bugs.chromium.org/p/webm/issues/detail?id=1089 Change-Id: I97438c357bd88f52f5a15c697031cf0c3cc8f510 --- diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index fc022093c..7b490af34 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -728,10 +728,8 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { }; static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { - // TODO(aconverse): model[PIVOT_NODE] should never be zero. - // https://code.google.com/p/webm/issues/detail?id=1089 - memcpy(probs, vp9_pareto8_full[p == 0 ? 254 : p - 1], - MODEL_NODES * sizeof(vpx_prob)); + assert(p != 0); + memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob)); } void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { diff --git a/vp9/encoder/vp9_cost.c b/vp9/encoder/vp9_cost.c index c85f76322..5d14742bc 100644 --- a/vp9/encoder/vp9_cost.c +++ b/vp9/encoder/vp9_cost.c @@ -12,9 +12,8 @@ #include "vp9/encoder/vp9_cost.h" /* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT)) - Begins and ends with a bogus entry to satisfy use of prob=0 in the firstpass. - https://code.google.com/p/webm/issues/detail?id=1089 */ -const uint16_t vp9_prob_cost[257] = { + Begins with a bogus entry for simpler addressing. */ +const uint16_t vp9_prob_cost[256] = { 4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325, 2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780, 1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470, @@ -36,13 +35,14 @@ const uint16_t vp9_prob_cost[257] = { 125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89, 86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51, 48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15, - 12, 9, 6, 3, 3}; + 12, 9, 6, 3}; static void cost(int *costs, vpx_tree tree, const vpx_prob *probs, int i, int c) { const vpx_prob prob = probs[i / 2]; int b; + assert(prob != 0); for (b = 0; b <= 1; ++b) { const int cc = c + vp9_cost_bit(prob, b); const vpx_tree_index ii = tree[i + b]; diff --git a/vp9/encoder/vp9_cost.h b/vp9/encoder/vp9_cost.h index 9831013b1..0c70b7826 100644 --- a/vp9/encoder/vp9_cost.h +++ b/vp9/encoder/vp9_cost.h @@ -18,7 +18,7 @@ extern "C" { #endif -extern const uint16_t vp9_prob_cost[257]; +extern const uint16_t vp9_prob_cost[256]; // The factor to scale from cost in bits to cost in vp9_prob_cost units. #define VP9_PROB_COST_SHIFT 9 diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index fc32d1911..fba45adc7 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -286,29 +286,37 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { set_block_thresholds(cm, rd); set_partition_probs(cm, xd); - if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) - fill_token_costs(x->token_costs, cm->fc->coef_probs); - - if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || - cm->frame_type == KEY_FRAME) { - for (i = 0; i < PARTITION_CONTEXTS; ++i) - vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i), - vp9_partition_tree); - } + if (cpi->oxcf.pass == 1) { + if (!frame_is_intra_only(cm)) + vp9_build_nmv_cost_table( + x->nmvjointcost, + cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, + &cm->fc->nmvc, cm->allow_high_precision_mv); + } else { + if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) + fill_token_costs(x->token_costs, cm->fc->coef_probs); + + if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || + cm->frame_type == KEY_FRAME) { + for (i = 0; i < PARTITION_CONTEXTS; ++i) + vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i), + vp9_partition_tree); + } - if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || - cm->frame_type == KEY_FRAME) { - fill_mode_costs(cpi); + if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || + cm->frame_type == KEY_FRAME) { + fill_mode_costs(cpi); - if (!frame_is_intra_only(cm)) { - vp9_build_nmv_cost_table(x->nmvjointcost, - cm->allow_high_precision_mv ? x->nmvcost_hp - : x->nmvcost, - &cm->fc->nmvc, cm->allow_high_precision_mv); + if (!frame_is_intra_only(cm)) { + vp9_build_nmv_cost_table( + x->nmvjointcost, + cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, + &cm->fc->nmvc, cm->allow_high_precision_mv); - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - vp9_cost_tokens((int *)cpi->inter_mode_cost[i], - cm->fc->inter_mode_probs[i], vp9_inter_mode_tree); + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + vp9_cost_tokens((int *)cpi->inter_mode_cost[i], + cm->fc->inter_mode_probs[i], vp9_inter_mode_tree); + } } } }