From d00b8e5f82069a2ce819f1ee1588d52d04cbf65c Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 28 Jun 2013 10:40:21 -0700 Subject: [PATCH] Inline vp9_get_coef_context() (and remove vp9_ prefix). Makes cost_coeffs() a lot faster: 4x4: 236 -> 181 cycles 8x8: 888 -> 588 cycles 16x16: 3550 -> 2483 cycles 32x32: 17392 -> 12010 cycles Total encode time of first 50 frames of bus (speed 0) @ 1500kbps goes from 2min51.6 to 2min43.9, i.e. 4.7% overall speedup. Change-Id: I16b8d595946393c8dc661599550b3f37f5718896 --- vp9/common/vp9_entropy.c | 26 -------------------------- vp9/common/vp9_entropy.h | 23 +++++++++++++++++++++-- vp9/decoder/vp9_detokenize.c | 8 ++++---- vp9/encoder/vp9_encodemb.c | 2 +- vp9/encoder/vp9_rdopt.c | 4 ++-- vp9/encoder/vp9_tokenize.c | 2 +- 6 files changed, 29 insertions(+), 36 deletions(-) diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c index ec61f990f..d5f94ed76 100644 --- a/vp9/common/vp9_entropy.c +++ b/vp9/common/vp9_entropy.c @@ -445,32 +445,6 @@ vp9_extra_bit vp9_extra_bits[12] = { #include "vp9/common/vp9_default_coef_probs.h" -// This function updates and then returns n AC coefficient context -// This is currently a placeholder function to allow experimentation -// using various context models based on the energy earlier tokens -// within the current block. -// -// For now it just returns the previously used context. -#define MAX_NEIGHBORS 2 -int vp9_get_coef_context(const int *scan, const int *neighbors, - int nb_pad, uint8_t *token_cache, int c, int l) { - int eob = l; - assert(nb_pad == MAX_NEIGHBORS); - if (c == eob) { - return 0; - } else { - int ctx; - assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0); - if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) { - ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] + - token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1; - } else { - ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]]; - } - return ctx; - } -}; - void vp9_default_coef_probs(VP9_COMMON *pc) { vpx_memcpy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4, sizeof(pc->fc.coef_probs[TX_4X4])); diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index e9a47daed..665b5d8a4 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -148,8 +148,27 @@ static int get_coef_band(const uint8_t * band_translate, int coef_index) { ? (COEF_BANDS-1) : band_translate[coef_index]; } -extern int vp9_get_coef_context(const int *scan, const int *neighbors, - int nb_pad, uint8_t *token_cache, int c, int l); +#define MAX_NEIGHBORS 2 +static INLINE int get_coef_context(const int *scan, const int *neighbors, + int nb_pad, uint8_t *token_cache, + int c, int l) { + int eob = l; + assert(nb_pad == MAX_NEIGHBORS); + if (c == eob) { + return 0; + } else { + int ctx; + assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0); + if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) { + ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] + + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1; + } else { + ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]]; + } + return ctx; + } +} + const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad); diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 343b4f26d..bc1c58aa3 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -172,8 +172,8 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd, if (c >= seg_eob) break; if (c) - pt = vp9_get_coef_context(scan, nb, pad, token_cache, - c, default_eob); + pt = get_coef_context(scan, nb, pad, token_cache, + c, default_eob); band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; #if !CONFIG_BALANCED_COEFTREE @@ -186,8 +186,8 @@ SKIP_START: if (c >= seg_eob) break; if (c) - pt = vp9_get_coef_context(scan, nb, pad, token_cache, - c, default_eob); + pt = get_coef_context(scan, nb, pad, token_cache, + c, default_eob); band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 4cfdff387..e69624832 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -116,7 +116,7 @@ static int trellis_get_coeff_context(const int *scan, int pad, int l) { int bak = token_cache[scan[idx]], pt; token_cache[scan[idx]] = vp9_pt_energy_class[token]; - pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l); + pt = get_coef_context(scan, nb, pad, token_cache, idx + 1, l); token_cache[scan[idx]] = bak; return pt; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index eef9349eb..e508a7eda 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -402,7 +402,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, v = qcoeff_ptr[rc]; t = vp9_dct_value_tokens_ptr[v].token; - pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); + pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob); cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v]; token_cache[rc] = vp9_pt_energy_class[t]; prev_t = t; @@ -410,7 +410,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, // eob token if (c < seg_eob) { - pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); + pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob); cost += token_costs[0][get_coef_band(band_translate, c)][pt] [DCT_EOB_TOKEN]; } diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 885079dc1..abf05c254 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -193,7 +193,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, int v = 0; rc = scan[c]; if (c) - pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); + pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob); if (c < eob) { v = qcoeff_ptr[rc]; assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE); -- 2.40.0