From c6eba0b47aa4a88da7513dcd4834fd30585cd10c Mon Sep 17 00:00:00 2001 From: Julia Robson Date: Wed, 17 Feb 2016 15:52:31 +0000 Subject: [PATCH] cost_coeff speed improvements Preliminary tests indicated that these changes make cost_coeffs approximately 20% faster which is a 2% improvement overall Change-Id: Iaf013ba75884415cd824e98349f654ffb1c3ef33 --- vp10/encoder/rdopt.c | 99 +++++++++++++++++++++++++---------------- vp10/encoder/tokenize.c | 29 ++++++++++++ vp10/encoder/tokenize.h | 13 ++++++ 3 files changed, 103 insertions(+), 38 deletions(-) diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 736adbbab..784ee90e7 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -539,47 +539,70 @@ static int cost_coeffs(MACROBLOCK *x, cost = token_costs[0][0][pt][EOB_TOKEN]; c = 0; } else { - int band_left = *band_count++; - - // dc token - int v = qcoeff[0]; - int16_t prev_t; - EXTRABIT e; - vp10_get_token_extra(v, &prev_t, &e); - cost = (*token_costs)[0][pt][prev_t] + - vp10_get_cost(prev_t, e, cat6_high_cost); - - token_cache[0] = vp10_pt_energy_class[prev_t]; - ++token_costs; - - // ac tokens - for (c = 1; c < eob; c++) { - const int rc = scan[c]; - int16_t t; - - v = qcoeff[rc]; - vp10_get_token_extra(v, &t, &e); - if (use_fast_coef_costing) { - cost += (*token_costs)[!prev_t][!prev_t][t] + - vp10_get_cost(t, e, cat6_high_cost); - } else { - pt = get_coef_context(nb, token_cache, c); - cost += (*token_costs)[!prev_t][pt][t] + - vp10_get_cost(t, e, cat6_high_cost); - token_cache[rc] = vp10_pt_energy_class[t]; - } - prev_t = t; - if (!--band_left) { - band_left = *band_count++; - ++token_costs; + if (use_fast_coef_costing) { + int band_left = *band_count++; + + // dc token + int v = qcoeff[0]; + int16_t prev_t; + cost = vp10_get_token_cost(v, &prev_t, cat6_high_cost); + cost += (*token_costs)[0][pt][prev_t]; + + token_cache[0] = vp10_pt_energy_class[prev_t]; + ++token_costs; + + // ac tokens + for (c = 1; c < eob; c++) { + const int rc = scan[c]; + int16_t t; + + v = qcoeff[rc]; + cost += vp10_get_token_cost(v, &t, cat6_high_cost); + cost += (*token_costs)[!prev_t][!prev_t][t]; + prev_t = t; + if (!--band_left) { + band_left = *band_count++; + ++token_costs; + } } - } - // eob token - if (band_left) { - if (use_fast_coef_costing) { + // eob token + if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; - } else { + + } else { // !use_fast_coef_costing + int band_left = *band_count++; + + // dc token + int v = qcoeff[0]; + int16_t tok; + unsigned int (*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS]; + cost = vp10_get_token_cost(v, &tok, cat6_high_cost); + cost += (*token_costs)[0][pt][tok]; + + token_cache[0] = vp10_pt_energy_class[tok]; + ++token_costs; + + tok_cost_ptr = &((*token_costs)[!tok]); + + // ac tokens + for (c = 1; c < eob; c++) { + const int rc = scan[c]; + + v = qcoeff[rc]; + cost += vp10_get_token_cost(v, &tok, cat6_high_cost); + pt = get_coef_context(nb, token_cache, c); + cost += (*tok_cost_ptr)[pt][tok]; + token_cache[rc] = vp10_pt_energy_class[tok]; + if (!--band_left) { + band_left = *band_count++; + ++token_costs; + } + tok_cost_ptr = &((*token_costs)[!tok]); + } + + // eob token + if (band_left) { pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[0][pt][EOB_TOKEN]; } diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index 0aaeb2ab5..5cae8e302 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c @@ -50,6 +50,35 @@ static const TOKENVALUE dct_cat_lt_10_value_tokens[] = { const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens + (sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens)) / 2; +// The corresponding costs of the extrabits for the tokens in the above table +// are stored in the table below. The values are obtained from looking up the +// entry for the specified extrabits in the table corresponding to the token +// (as defined in cost element vp10_extra_bits) +// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1] +static const int dct_cat_lt_10_value_cost[] = { + 3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531, + 3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190, + 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894, + 2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553, + 3197, 3116, 3058, 2977, 2881, 2800, + 2742, 2661, 2615, 2534, 2476, 2395, + 2299, 2218, 2160, 2079, + 2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652, + 1893, 1696, 1453, 1256, 1229, 864, + 512, 512, 512, 512, 0, + 512, 512, 512, 512, + 864, 1229, 1256, 1453, 1696, 1893, + 1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566, + 2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615, + 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197, + 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, + 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136, + 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432, + 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773, +}; +const int *vp10_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost + + (sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost)) + / 2; // Array indices are identical to previously-existing CONTEXT_NODE indices const vpx_tree_index vp10_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { diff --git a/vp10/encoder/tokenize.h b/vp10/encoder/tokenize.h index 12f5f1fc3..46b7f3f35 100644 --- a/vp10/encoder/tokenize.h +++ b/vp10/encoder/tokenize.h @@ -76,6 +76,7 @@ extern const int16_t *vp10_dct_value_cost_ptr; */ extern const TOKENVALUE *vp10_dct_value_tokens_ptr; extern const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens; +extern const int *vp10_dct_cat_lt_10_value_cost; extern const int16_t vp10_cat6_low_cost[256]; extern const int vp10_cat6_high_cost[64]; extern const int vp10_cat6_high10_high_cost[256]; @@ -119,6 +120,18 @@ static INLINE int16_t vp10_get_token(int v) { return vp10_dct_cat_lt_10_value_tokens[v].token; } +static INLINE int vp10_get_token_cost(int v, int16_t *token, + const int *cat6_high_table) { + if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) { + EXTRABIT extrabits; + *token = CATEGORY6_TOKEN; + extrabits = abs(v) - CAT6_MIN_VAL; + return vp10_cat6_low_cost[extrabits & 0xff] + + cat6_high_table[extrabits >> 8]; + } + *token = vp10_dct_cat_lt_10_value_tokens[v].token; + return vp10_dct_cat_lt_10_value_cost[v]; +} #ifdef __cplusplus } // extern "C" -- 2.40.0