From: Alex Converse <aconverse@google.com>
Date: Tue, 16 Feb 2016 21:41:01 +0000 (-0800)
Subject: ANS: Switch from PDFs to CDFs.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6bbbe31656598365155e15a5bce940a05fd9d581;p=libvpx

ANS: Switch from PDFs to CDFs.

Make the RANS implementation operate on cumulative distribution
functions rather than individual probability distribution functions.
CDFs have shown themselves more flexible to work with.

Reduces decoding memory usage from scaling O(num_distributions *
symbol_resolution) to O(num_distributions).

No bitstream change. This is an purely implementation change.

Change-Id: I4e18d3a0a3d37a36a61487c3d778f9d088b0b374
---

diff --git a/test/vp10_ans_test.cc b/test/vp10_ans_test.cc
index 441583ad1..363161d7a 100644
--- a/test/vp10_ans_test.cc
+++ b/test/vp10_ans_test.cc
@@ -147,7 +147,7 @@ bool check_vpxbool(const PvVec &pv_vec, uint8_t *buf) {
 }
 
 const rans_sym rans_sym_tab[] = {
-    {70, 186}, {70, 116}, {100, 16}, {16, 0},
+    {16, 0}, {100, 16}, {70, 116}, {70, 186},
 };
 const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]);
 
@@ -170,13 +170,9 @@ std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
 
 void rans_build_dec_tab(const struct rans_sym sym_tab[],
                         rans_dec_lut dec_tab) {
-  int val = 0;
-  int i;
-  for (i = ans_p8_precision - 1; i >= 0; --i) {
-    dec_tab[i].val = val;
-    dec_tab[i].prob = sym_tab[val].prob;
-    dec_tab[i].cum_prob = sym_tab[val].cum_prob;
-    if (i == sym_tab[val].cum_prob) ++val;
+  dec_tab[0] = 0;
+  for (int i = 1; dec_tab[i - 1] < ans_p8_precision; ++i) {
+    dec_tab[i] = dec_tab[i - 1] + sym_tab[i - 1].prob;
   }
 }
 
diff --git a/vp10/common/ans.h b/vp10/common/ans.h
index a1862f30d..163a7a956 100644
--- a/vp10/common/ans.h
+++ b/vp10/common/ans.h
@@ -241,23 +241,16 @@ struct rans_dec_sym {
   AnsP8 cum_prob;  // not-inclusive
 };
 
-typedef struct rans_dec_sym rans_dec_lut[ans_p8_precision];
+// This is now just a boring cdf. It starts with an explict zero.
+// TODO(aconverse): Remove starting zero.
+typedef uint16_t rans_dec_lut[16];
 
-static INLINE void rans_build_dec_tab(const AnsP8 token_probs[],
-                                      rans_dec_lut dec_tab) {
-  int val = 0;
-  int cum_prob = 0;
-  int sym_end = token_probs[0];
+static INLINE void rans_build_cdf_from_pdf(const AnsP8 token_probs[],
+                                           rans_dec_lut cdf_tab) {
   int i;
-  for (i = 0; i < 256; ++i) {
-    if (i == sym_end) {
-      ++val;
-      cum_prob = sym_end;
-      sym_end += token_probs[val];
-    }
-    dec_tab[i].val = val;
-    dec_tab[i].prob = token_probs[val];
-    dec_tab[i].cum_prob = cum_prob;
+  cdf_tab[0] = 0;
+  for (i = 1; cdf_tab[i - 1] < ans_p8_precision; ++i) {
+    cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
   }
 }
 
@@ -275,20 +268,32 @@ static INLINE void rans_write(struct AnsCoder *ans,
       (ans->state / p) * ans_p8_precision + ans->state % p + sym->cum_prob;
 }
 
+static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
+                             AnsP8 rem) {
+  int i = 0;
+  // TODO(skal): if critical, could be a binary search.
+  // Or, better, an O(1) alias-table.
+  while (rem >= cdf[i]) {
+    ++i;
+  }
+  out->val = i - 1;
+  out->prob = cdf[i] - cdf[i - 1];
+  out->cum_prob = cdf[i - 1];
+}
+
 static INLINE int rans_read(struct AnsDecoder *ans,
                             const rans_dec_lut tab) {
   unsigned rem;
   unsigned quo;
-  int val;
+  struct rans_dec_sym sym;
   if (ans->state < l_base && ans->buf_offset > 0) {
     ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
   }
   quo = ans->state / ans_p8_precision;
   rem = ans->state % ans_p8_precision;
-  val = tab[rem].val;
-
-  ans->state = quo * tab[rem].prob + rem - tab[rem].cum_prob;
-  return val;
+  fetch_sym(&sym, tab, rem);
+  ans->state = quo * sym.prob + rem - sym.cum_prob;
+  return sym.val;
 }
 
 static INLINE int ans_read_init(struct AnsDecoder *const ans,
diff --git a/vp10/common/entropy.c b/vp10/common/entropy.c
index 8c7e27a9b..df03224bf 100644
--- a/vp10/common/entropy.c
+++ b/vp10/common/entropy.c
@@ -676,12 +676,12 @@ const vpx_prob vp10_pareto8_token_probs[COEFF_PROB_MODELS]
   {247, 1, 1, 1, 1, 1, 1, 1, 1, 1},
 };
 
-void vp10_build_pareto8_dec_tab(
+void vp10_build_pareto8_cdf_tab(
     const vpx_prob token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2],
-    rans_dec_lut dec_tab[COEFF_PROB_MODELS]) {
+    rans_dec_lut cdf_tab[COEFF_PROB_MODELS]) {
   int p;
   for (p = 0; p < COEFF_PROB_MODELS; ++p) {
-    rans_build_dec_tab(token_probs[p], dec_tab[p]);
+    rans_build_cdf_from_pdf(token_probs[p], cdf_tab[p]);
   }
 }
 #endif  // CONFIG_ANS
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index 4fa330e33..8ea01be4b 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h
@@ -170,9 +170,9 @@ extern const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
 extern const vpx_prob
     vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2];
 
-void vp10_build_pareto8_dec_tab(
+void vp10_build_pareto8_cdf_tab(
     const vpx_prob token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2],
-    rans_dec_lut dec_tab[COEFF_PROB_MODELS]);
+    rans_dec_lut cdf_tab[COEFF_PROB_MODELS]);
 #endif  // CONFIG_ANS
 
 typedef vpx_prob vp10_coeff_probs_model[REF_TYPES][COEF_BANDS]
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index 02d7e1761..bcc69f3bd 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -324,6 +324,9 @@ typedef struct VP10Common {
   // - this is intentionally not placed in FRAME_CONTEXT since it's reset upon
   // each keyframe and not used afterwards
   vpx_prob kf_y_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1];
+#if CONFIG_ANS
+  rans_dec_lut token_tab[COEFF_PROB_MODELS];
+#endif  // CONFIG_ANS
 } VP10_COMMON;
 
 // TODO(hkuang): Don't need to lock the whole pool after implementing atomic
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 2c2dc485b..af6016a68 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -1858,7 +1858,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
           for (col = 0; col < max_blocks_wide; col += step)
             predict_and_reconstruct_intra_block(xd,
 #if CONFIG_ANS
-                                                pbi->token_tab, tok,
+                                                cm->token_tab, tok,
 #else
                                                 r,
 #endif
@@ -1959,7 +1959,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
             for (col = 0; col < max_blocks_wide; col += step)
               eobtotal += reconstruct_inter_block(xd,
 #if CONFIG_ANS
-                                                  pbi->token_tab, tok,
+                                                  cm->token_tab, tok,
 #else
                                                   r,
 #endif
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index 13a590a05..35c53df6d 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c
@@ -119,7 +119,7 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
   vp10_loop_restoration_precal();
 #endif  // CONFIG_LOOP_RESTORATION
 #if CONFIG_ANS
-  vp10_build_pareto8_dec_tab(vp10_pareto8_token_probs, pbi->token_tab);
+  vp10_build_pareto8_cdf_tab(vp10_pareto8_token_probs, cm->token_tab);
 #endif  // CONFIG_ANS
 
   cm->error.setjmp = 0;
diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h
index e590d8be1..a69d05f07 100644
--- a/vp10/decoder/decoder.h
+++ b/vp10/decoder/decoder.h
@@ -89,9 +89,6 @@ typedef struct VP10Decoder {
   int inv_tile_order;
   int need_resync;  // wait for key/intra-only frame.
   int hold_ref_buf;  // hold the reference buffer.
-#if CONFIG_ANS
-  rans_dec_lut token_tab[COEFF_PROB_MODELS];
-#endif  // CONFIG_ANS
 } VP10Decoder;
 
 int vp10_receive_compressed_data(struct VP10Decoder *pbi,
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index fa6c2cbe7..2603b6b48 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -634,6 +634,7 @@ static void pack_mb_tokens(vpx_writer *w,
 // This function serializes the tokens backwards both in token order and
 // bit order in each token.
 static void pack_mb_tokens_ans(struct AnsCoder *const ans,
+                               rans_dec_lut token_tab[COEFF_PROB_MODELS],
                                const TOKENEXTRA *const start,
                                const TOKENEXTRA *const stop,
                                vpx_bit_depth_t bit_depth) {
@@ -676,14 +677,10 @@ static void pack_mb_tokens_ans(struct AnsCoder *const ans,
 
       {
         struct rans_sym s;
-        int j;
-        const vpx_prob *token_probs =
-            vp10_pareto8_token_probs[p->context_tree[PIVOT_NODE] - 1];
-        s.cum_prob = 0;
-        for (j = ONE_TOKEN; j < t; ++j) {
-          s.cum_prob += token_probs[j - ONE_TOKEN];
-        }
-        s.prob = token_probs[t - ONE_TOKEN];
+        const rans_dec_lut *token_cdf =
+            &token_tab[p->context_tree[PIVOT_NODE] - 1];
+        s.cum_prob = (*token_cdf)[t - ONE_TOKEN];
+        s.prob = (*token_cdf)[t - ONE_TOKEN + 1] - s.cum_prob;
         rans_write(ans, &s);
       }
     }
@@ -2200,7 +2197,8 @@ static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr,
                   NULL, NULL);
       vpx_stop_encode(&mode_bc);
       ans_write_init(&token_ans, mode_data_start + mode_bc.pos);
-      pack_mb_tokens_ans(&token_ans, tok, tok_end, cm->bit_depth);
+      pack_mb_tokens_ans(&token_ans, cm->token_tab, tok, tok_end,
+                         cm->bit_depth);
       token_section_size = ans_write_end(&token_ans);
       if (put_tile_size) {
         // size of this tile
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index fc9e2e924..ac8d2770c 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -1986,6 +1986,9 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
 #if CONFIG_LOOP_RESTORATION
   vp10_loop_restoration_precal();
 #endif  // CONFIG_LOOP_RESTORATION
+#if CONFIG_ANS
+  vp10_build_pareto8_cdf_tab(vp10_pareto8_token_probs, cm->token_tab);
+#endif  // CONFIG_ANS
 
   cm->error.setjmp = 0;