From: Yaowu Xu Date: Thu, 22 Jan 2015 23:27:43 +0000 (-0800) Subject: Replace divide with look-up X-Git-Tag: v1.4.0~223^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=eda179764f5b270c7298660a12fb8f9176902e2a;p=libvpx Replace divide with look-up This commit replaces an integer divide with a table-lookup. It is to improve decoding speed, and at the same time, to reduce possible complications with a bug in AMD Family 12h processors: "665 Integer Divide Instruction May Cause Unpredictable Behavior" Change-Id: I678b707a538798a923850bac467e66e847e6def7 --- diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c index 4557e19bf..47e5164d7 100644 --- a/vp9/common/vp9_entropymode.c +++ b/vp9/common/vp9_entropymode.c @@ -334,20 +334,6 @@ const vp9_tree_index vp9_switchable_interp_tree -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP }; -#define COUNT_SAT 20 -#define MAX_UPDATE_FACTOR 128 - -static int adapt_prob(vp9_prob pre_prob, const unsigned int ct[2]) { - return merge_probs(pre_prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR); -} - -static void adapt_probs(const vp9_tree_index *tree, - const vp9_prob *pre_probs, const unsigned int *counts, - vp9_prob *probs) { - vp9_tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR, - probs); -} - void vp9_adapt_mode_probs(VP9_COMMON *cm) { int i, j; FRAME_CONTEXT *fc = cm->fc; @@ -355,39 +341,41 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { const FRAME_COUNTS *counts = &cm->counts; for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - fc->intra_inter_prob[i] = adapt_prob(pre_fc->intra_inter_prob[i], - counts->intra_inter[i]); + fc->intra_inter_prob[i] = mode_mv_merge_probs(pre_fc->intra_inter_prob[i], + counts->intra_inter[i]); for (i = 0; i < COMP_INTER_CONTEXTS; i++) - fc->comp_inter_prob[i] = adapt_prob(pre_fc->comp_inter_prob[i], - counts->comp_inter[i]); + fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i], + counts->comp_inter[i]); for (i = 0; i < REF_CONTEXTS; i++) - fc->comp_ref_prob[i] = adapt_prob(pre_fc->comp_ref_prob[i], - counts->comp_ref[i]); + fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i], + counts->comp_ref[i]); for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) - fc->single_ref_prob[i][j] = adapt_prob(pre_fc->single_ref_prob[i][j], - counts->single_ref[i][j]); + fc->single_ref_prob[i][j] = mode_mv_merge_probs( + pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); for (i = 0; i < INTER_MODE_CONTEXTS; i++) - adapt_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i], + vp9_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i], counts->inter_mode[i], fc->inter_mode_probs[i]); for (i = 0; i < BLOCK_SIZE_GROUPS; i++) - adapt_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i], + vp9_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i], counts->y_mode[i], fc->y_mode_prob[i]); for (i = 0; i < INTRA_MODES; ++i) - adapt_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i], - counts->uv_mode[i], fc->uv_mode_prob[i]); + vp9_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i], + counts->uv_mode[i], fc->uv_mode_prob[i]); for (i = 0; i < PARTITION_CONTEXTS; i++) - adapt_probs(vp9_partition_tree, pre_fc->partition_prob[i], - counts->partition[i], fc->partition_prob[i]); + vp9_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i], + counts->partition[i], fc->partition_prob[i]); if (cm->interp_filter == SWITCHABLE) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) - adapt_probs(vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i], - counts->switchable_interp[i], fc->switchable_interp_prob[i]); + vp9_tree_merge_probs(vp9_switchable_interp_tree, + pre_fc->switchable_interp_prob[i], + counts->switchable_interp[i], + fc->switchable_interp_prob[i]); } if (cm->tx_mode == TX_MODE_SELECT) { @@ -399,23 +387,24 @@ void vp9_adapt_mode_probs(VP9_COMMON *cm) { for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p); for (j = 0; j < TX_SIZES - 3; ++j) - fc->tx_probs.p8x8[i][j] = adapt_prob(pre_fc->tx_probs.p8x8[i][j], - branch_ct_8x8p[j]); + fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]); tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p); for (j = 0; j < TX_SIZES - 2; ++j) - fc->tx_probs.p16x16[i][j] = adapt_prob(pre_fc->tx_probs.p16x16[i][j], - branch_ct_16x16p[j]); + fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]); tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p); for (j = 0; j < TX_SIZES - 1; ++j) - fc->tx_probs.p32x32[i][j] = adapt_prob(pre_fc->tx_probs.p32x32[i][j], - branch_ct_32x32p[j]); + fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]); } } for (i = 0; i < SKIP_CONTEXTS; ++i) - fc->skip_probs[i] = adapt_prob(pre_fc->skip_probs[i], counts->skip[i]); + fc->skip_probs[i] = mode_mv_merge_probs( + pre_fc->skip_probs[i], counts->skip[i]); } static void set_default_lf_deltas(struct loopfilter *lf) { diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c index 922c03947..2477e6ef3 100644 --- a/vp9/common/vp9_entropymv.c +++ b/vp9/common/vp9_entropymv.c @@ -11,9 +11,6 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymv.h" -#define MV_COUNT_SAT 20 -#define MV_MAX_UPDATE_FACTOR 128 - // Integer pel reference mv threshold for use of high-precision 1/8 mv #define COMPANDED_MVREF_THRESH 8 @@ -183,16 +180,6 @@ void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) { } } -static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) { - return merge_probs(prep, ct, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR); -} - -static void adapt_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs, - const unsigned int *counts, vp9_prob *probs) { - vp9_tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT, - MV_MAX_UPDATE_FACTOR, probs); -} - void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { int i, j; @@ -200,30 +187,32 @@ void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc; const nmv_context_counts *counts = &cm->counts.mv; - adapt_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, fc->joints); + vp9_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, + fc->joints); for (i = 0; i < 2; ++i) { nmv_component *comp = &fc->comps[i]; const nmv_component *pre_comp = &pre_fc->comps[i]; const nmv_component_counts *c = &counts->comps[i]; - comp->sign = adapt_prob(pre_comp->sign, c->sign); - adapt_probs(vp9_mv_class_tree, pre_comp->classes, c->classes, - comp->classes); - adapt_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, comp->class0); + comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign); + vp9_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes, + comp->classes); + vp9_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, + comp->class0); for (j = 0; j < MV_OFFSET_BITS; ++j) - comp->bits[j] = adapt_prob(pre_comp->bits[j], c->bits[j]); + comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]); for (j = 0; j < CLASS0_SIZE; ++j) - adapt_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], c->class0_fp[j], - comp->class0_fp[j]); + vp9_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], + c->class0_fp[j], comp->class0_fp[j]); - adapt_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); + vp9_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); if (allow_hp) { - comp->class0_hp = adapt_prob(pre_comp->class0_hp, c->class0_hp); - comp->hp = adapt_prob(pre_comp->hp, c->hp); + comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp); + comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp); } } } diff --git a/vp9/common/vp9_prob.c b/vp9/common/vp9_prob.c index a1befc63e..3b7b9bf3b 100644 --- a/vp9/common/vp9_prob.c +++ b/vp9/common/vp9_prob.c @@ -29,33 +29,25 @@ const uint8_t vp9_norm[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - static unsigned int tree_merge_probs_impl(unsigned int i, const vp9_tree_index *tree, const vp9_prob *pre_probs, const unsigned int *counts, - unsigned int count_sat, - unsigned int max_update, vp9_prob *probs) { const int l = tree[i]; const unsigned int left_count = (l <= 0) ? counts[-l] - : tree_merge_probs_impl(l, tree, pre_probs, counts, - count_sat, max_update, probs); + : tree_merge_probs_impl(l, tree, pre_probs, counts, probs); const int r = tree[i + 1]; const unsigned int right_count = (r <= 0) ? counts[-r] - : tree_merge_probs_impl(r, tree, pre_probs, counts, - count_sat, max_update, probs); + : tree_merge_probs_impl(r, tree, pre_probs, counts, probs); const unsigned int ct[2] = { left_count, right_count }; - probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct, - count_sat, max_update); + probs[i >> 1] = mode_mv_merge_probs(pre_probs[i >> 1], ct); return left_count + right_count; } void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs, - const unsigned int *counts, unsigned int count_sat, - unsigned int max_update_factor, vp9_prob *probs) { - tree_merge_probs_impl(0, tree, pre_probs, counts, count_sat, - max_update_factor, probs); + const unsigned int *counts, vp9_prob *probs) { + tree_merge_probs_impl(0, tree, pre_probs, counts, probs); } diff --git a/vp9/common/vp9_prob.h b/vp9/common/vp9_prob.h index bc1511a5e..c69c62c81 100644 --- a/vp9/common/vp9_prob.h +++ b/vp9/common/vp9_prob.h @@ -33,6 +33,8 @@ typedef int8_t vp9_tree_index; #define vp9_complement(x) (255 - x) +#define MODE_MV_COUNT_SAT 20 + /* We build coding trees compactly in arrays. Each node of the tree is a pair of vp9_tree_indices. Array index often references a corresponding probability table. @@ -69,9 +71,28 @@ static INLINE vp9_prob merge_probs(vp9_prob pre_prob, return weighted_prob(pre_prob, prob, factor); } +// MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT; +static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { + 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, + 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 +}; + +static INLINE vp9_prob mode_mv_merge_probs(vp9_prob pre_prob, + const unsigned int ct[2]) { + const unsigned int den = ct[0] + ct[1]; + if (den == 0) { + return pre_prob; + } else { + const unsigned int count = MIN(den, MODE_MV_COUNT_SAT); + const unsigned int factor = count_to_update_factor[count]; + const vp9_prob prob = + clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den); + return weighted_prob(pre_prob, prob, factor); + } +} + void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs, - const unsigned int *counts, unsigned int count_sat, - unsigned int max_update_factor, vp9_prob *probs); + const unsigned int *counts, vp9_prob *probs); DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);