From: Alex Converse <aconverse@google.com>
Date: Wed, 16 Dec 2015 19:16:32 +0000 (-0800)
Subject: Add an implementation of Asymetric Numeral Systems (ANS).
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9ffcb469fb9748c7a5f9ececd82ee5a3c5c1c990;p=libvpx

Add an implementation of Asymetric Numeral Systems (ANS).

Change-Id: Ie41bc72127e700887566dcc951da9d83a0b94891
---

diff --git a/configure b/configure
index 29b1da8c6..f94546e7e 100755
--- a/configure
+++ b/configure
@@ -281,6 +281,7 @@ EXPERIMENT_LIST="
     ext_interp
     ext_refs
     supertx
+    ans
 "
 CONFIG_LIST="
     dependency_tracking
diff --git a/test/test.mk b/test/test.mk
index 471f87021..81f20fb76 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -165,6 +165,7 @@ ifeq ($(CONFIG_VP10),yes)
 
 LIBVPX_TEST_SRCS-yes                    += vp10_inv_txfm_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ANS)          += vp10_ans_test.cc
 
 endif # VP10
 
diff --git a/test/vp10_ans_test.cc b/test/vp10_ans_test.cc
new file mode 100644
index 000000000..441583ad1
--- /dev/null
+++ b/test/vp10_ans_test.cc
@@ -0,0 +1,337 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <ctime>
+#include <utility>
+#include <vector>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "vp10/common/ans.h"
+#include "vp10/encoder/treewriter.h"
+#include "vpx_dsp/bitreader.h"
+#include "vpx_dsp/bitwriter.h"
+
+namespace {
+typedef std::vector<std::pair<uint8_t, bool> > PvVec;
+
+PvVec abs_encode_build_vals(int iters) {
+  PvVec ret;
+  libvpx_test::ACMRandom gen(0x30317076);
+  double entropy = 0;
+  for (int i = 0; i < iters; ++i) {
+    uint8_t p;
+    do {
+      p = gen.Rand8();
+    } while (p == 0);  // zero is not a valid coding probability
+    bool b = gen.Rand8() < p;
+    ret.push_back(std::make_pair(static_cast<uint8_t>(p), b));
+    double d = p / 256.;
+    entropy += -d * log2(d) - (1 - d) * log2(1 - d);
+  }
+  printf("entropy %f\n", entropy);
+  return ret;
+}
+
+bool check_rabs(const PvVec &pv_vec, uint8_t *buf) {
+  AnsCoder a;
+  ans_write_init(&a, buf);
+
+  std::clock_t start = std::clock();
+  for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
+       ++it) {
+    rabs_write(&a, it->second, 256 - it->first);
+  }
+  std::clock_t enc_time = std::clock() - start;
+  int offset = ans_write_end(&a);
+  bool okay = true;
+  AnsDecoder d;
+  if (ans_read_init(&d, buf, offset)) return false;
+  start = std::clock();
+  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+    okay &= rabs_read(&d, 256 - it->first) == it->second;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  if (!okay) return false;
+  printf("rABS size %d enc_time %f dec_time %f\n", offset,
+         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return ans_read_end(&d);
+}
+
+bool check_rabs_asc(const PvVec &pv_vec, uint8_t *buf) {
+  AnsCoder a;
+  ans_write_init(&a, buf);
+
+  std::clock_t start = std::clock();
+  for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
+       ++it) {
+    rabs_asc_write(&a, it->second, 256 - it->first);
+  }
+  std::clock_t enc_time = std::clock() - start;
+  int offset = ans_write_end(&a);
+  bool okay = true;
+  AnsDecoder d;
+  if (ans_read_init(&d, buf, offset)) return false;
+  start = std::clock();
+  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+    okay &= rabs_asc_read(&d, 256 - it->first) == it->second;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  if (!okay) return false;
+  printf("rABS (asc) size %d enc_time %f dec_time %f\n", offset,
+         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return ans_read_end(&d);
+}
+
+bool check_uabs(const PvVec &pv_vec, uint8_t *buf) {
+  AnsCoder a;
+  ans_write_init(&a, buf);
+
+  std::clock_t start = std::clock();
+  for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend();
+       ++it) {
+    uabs_write(&a, it->second, 256 - it->first);
+  }
+  std::clock_t enc_time = std::clock() - start;
+  int offset = ans_write_end(&a);
+  bool okay = true;
+  AnsDecoder d;
+  if (ans_read_init(&d, buf, offset)) return false;
+  start = std::clock();
+  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+    okay &= uabs_read(&d, 256 - it->first) == it->second;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  if (!okay) return false;
+  printf("uABS size %d enc_time %f dec_time %f\n", offset,
+         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return ans_read_end(&d);
+}
+
+bool check_vpxbool(const PvVec &pv_vec, uint8_t *buf) {
+  vpx_writer w;
+  vpx_reader r;
+  vpx_start_encode(&w, buf);
+
+  std::clock_t start = std::clock();
+  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+    vpx_write(&w, it->second, 256 - it->first);
+  }
+  std::clock_t enc_time = std::clock() - start;
+  vpx_stop_encode(&w);
+  bool okay = true;
+  vpx_reader_init(&r, buf, w.pos, NULL, NULL);
+  start = std::clock();
+  for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) {
+    okay &= vpx_read(&r, 256 - it->first) == it->second;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  printf("VPX size %d enc_time %f dec_time %f\n", w.pos,
+         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return okay;
+}
+
+const rans_sym rans_sym_tab[] = {
+    {70, 186}, {70, 116}, {100, 16}, {16, 0},
+};
+const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]);
+
+std::vector<int> ans_encode_build_vals(const rans_sym *tab, int iters) {
+  std::vector<int> p_to_sym;
+  int i = 0;
+  while (p_to_sym.size() < 256) {
+    p_to_sym.insert(p_to_sym.end(), tab[i].prob, i);
+    ++i;
+  }
+  assert(p_to_sym.size() == 256);
+  std::vector<int> ret;
+  libvpx_test::ACMRandom gen(18543637);
+  for (int i = 0; i < iters; ++i) {
+    int sym = p_to_sym[gen.Rand8()];
+    ret.push_back(sym);
+  }
+  return ret;
+}
+
+void rans_build_dec_tab(const struct rans_sym sym_tab[],
+                        rans_dec_lut dec_tab) {
+  int val = 0;
+  int i;
+  for (i = ans_p8_precision - 1; i >= 0; --i) {
+    dec_tab[i].val = val;
+    dec_tab[i].prob = sym_tab[val].prob;
+    dec_tab[i].cum_prob = sym_tab[val].cum_prob;
+    if (i == sym_tab[val].cum_prob) ++val;
+  }
+}
+
+bool check_rans(const std::vector<int> &sym_vec, const rans_sym *const tab,
+                uint8_t *buf) {
+  AnsCoder a;
+  ans_write_init(&a, buf);
+  rans_dec_lut dec_tab;
+  rans_build_dec_tab(tab, dec_tab);
+
+  std::clock_t start = std::clock();
+  for (std::vector<int>::const_reverse_iterator it = sym_vec.rbegin();
+       it != sym_vec.rend(); ++it) {
+    rans_write(&a, &tab[*it]);
+  }
+  std::clock_t enc_time = std::clock() - start;
+  int offset = ans_write_end(&a);
+  bool okay = true;
+  AnsDecoder d;
+  if (ans_read_init(&d, buf, offset)) return false;
+  start = std::clock();
+  for (std::vector<int>::const_iterator it = sym_vec.begin();
+       it != sym_vec.end(); ++it) {
+    okay &= rans_read(&d, dec_tab) == *it;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  if (!okay) return false;
+  printf("rANS size %d enc_time %f dec_time %f\n", offset,
+         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return ans_read_end(&d);
+}
+
+void build_tree(vpx_tree_index *tree, int num_syms) {
+  vpx_tree_index i;
+  int sym = 0;
+  for (i = 0; i < num_syms - 1; ++i) {
+    tree[2 * i] = sym--;
+    tree[2 * i + 1] = 2 * (i + 1);
+  }
+  tree[2 * i - 1] = sym;
+}
+
+// treep are the probabilites of tree nodes like:
+//          *
+//         / \
+//    -sym0  *
+//          / \
+//     -sym1  *
+//           / \
+//      -sym2  -sym3
+void tab2tree(const rans_sym *tab, int tab_size, vpx_prob *treep) {
+  const unsigned basep = 256;
+  unsigned pleft = basep;
+  for (int i = 0; i < tab_size - 1; ++i) {
+    unsigned prob = (tab[i].prob * basep + (basep / 2)) / pleft;
+    assert(prob > 0 && prob < 256);
+    treep[i] = prob;
+    pleft -= tab[i].prob;
+  }
+}
+
+struct sym_bools {
+  unsigned bits;
+  int len;
+};
+
+static void make_tree_bits_tab(sym_bools *tab, int num_syms) {
+  unsigned bits = 0;
+  int len = 0;
+  int i;
+  for (i = 0; i < num_syms - 1; ++i) {
+    bits *= 2;
+    ++len;
+    tab[i].bits = bits;
+    tab[i].len = len;
+    ++bits;
+  }
+  tab[i].bits = bits;
+  tab[i].len = len;
+}
+
+void build_tpb(vpx_prob probs[/*num_syms*/],
+               vpx_tree_index tree[/*2*num_syms*/],
+               sym_bools bit_len[/*num_syms*/],
+               const rans_sym sym_tab[/*num_syms*/], int num_syms) {
+  tab2tree(sym_tab, num_syms, probs);
+  build_tree(tree, num_syms);
+  make_tree_bits_tab(bit_len, num_syms);
+}
+
+bool check_vpxtree(const std::vector<int> &sym_vec, const rans_sym *sym_tab,
+                   uint8_t *buf) {
+  vpx_writer w;
+  vpx_reader r;
+  vpx_start_encode(&w, buf);
+
+  vpx_prob probs[kDistinctSyms];
+  vpx_tree_index tree[2 * kDistinctSyms];
+  sym_bools bit_len[kDistinctSyms];
+  build_tpb(probs, tree, bit_len, sym_tab, kDistinctSyms);
+
+  std::clock_t start = std::clock();
+  for (std::vector<int>::const_iterator it = sym_vec.begin();
+       it != sym_vec.end(); ++it) {
+    vp10_write_tree(&w, tree, probs, bit_len[*it].bits, bit_len[*it].len, 0);
+  }
+  std::clock_t enc_time = std::clock() - start;
+  vpx_stop_encode(&w);
+  vpx_reader_init(&r, buf, w.pos, NULL, NULL);
+  start = std::clock();
+  for (std::vector<int>::const_iterator it = sym_vec.begin();
+       it != sym_vec.end(); ++it) {
+    if (vpx_read_tree(&r, tree, probs) != *it) return false;
+  }
+  std::clock_t dec_time = std::clock() - start;
+  printf("VPXtree size %u enc_time %f dec_time %f\n", w.pos,
+         static_cast<float>(enc_time) / CLOCKS_PER_SEC,
+         static_cast<float>(dec_time) / CLOCKS_PER_SEC);
+  return true;
+}
+
+class Vp10AbsTest : public ::testing::Test {
+ protected:
+  static void SetUpTestCase() { pv_vec_ = abs_encode_build_vals(kNumBools); }
+  virtual void SetUp() { buf_ = new uint8_t[kNumBools / 8]; }
+  virtual void TearDown() { delete[] buf_; }
+  static const int kNumBools = 100000000;
+  static PvVec pv_vec_;
+  uint8_t *buf_;
+};
+PvVec Vp10AbsTest::pv_vec_;
+
+class Vp10AnsTest : public ::testing::Test {
+ protected:
+  static void SetUpTestCase() {
+    sym_vec_ = ans_encode_build_vals(rans_sym_tab, kNumSyms);
+  }
+  virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; }
+  virtual void TearDown() { delete[] buf_; }
+  static const int kNumSyms = 25000000;
+  static std::vector<int> sym_vec_;
+  uint8_t *buf_;
+};
+std::vector<int> Vp10AnsTest::sym_vec_;
+
+TEST_F(Vp10AbsTest, Vpxbool) { EXPECT_TRUE(check_vpxbool(pv_vec_, buf_)); }
+TEST_F(Vp10AbsTest, Rabs) { EXPECT_TRUE(check_rabs(pv_vec_, buf_)); }
+TEST_F(Vp10AbsTest, RabsAsc) { EXPECT_TRUE(check_rabs_asc(pv_vec_, buf_)); }
+TEST_F(Vp10AbsTest, Uabs) { EXPECT_TRUE(check_uabs(pv_vec_, buf_)); }
+
+TEST_F(Vp10AnsTest, Rans) {
+  EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab, buf_));
+}
+TEST_F(Vp10AnsTest, Vpxtree) {
+  EXPECT_TRUE(check_vpxtree(sym_vec_, rans_sym_tab, buf_));
+}
+}  // namespace
diff --git a/vp10/common/ans.h b/vp10/common/ans.h
new file mode 100644
index 000000000..fbc9a45ca
--- /dev/null
+++ b/vp10/common/ans.h
@@ -0,0 +1,299 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ANS_H_
+#define VP10_COMMON_ANS_H_
+// An implementation of Asymmetric Numeral Systems
+// http://arxiv.org/abs/1311.2540v2
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem_ops.h"
+
+#define ANS_DIVIDE_BY_MULTIPLY 1
+#if ANS_DIVIDE_BY_MULTIPLY
+#include "vp10/common/divide.h"
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+  do { \
+    quotient = fastdiv(dividend, divisor); \
+    remainder = dividend - quotient * divisor; \
+  } while (0)
+#define ANS_DIV(dividend, divisor) \
+  fastdiv(dividend, divisor)
+#else
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+  do { \
+    quotient = dividend / divisor; \
+    remainder = dividend % divisor; \
+  } while (0)
+#define ANS_DIV(dividend, divisor) \
+    ((dividend) / (divisor))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+struct AnsCoder {
+  uint8_t *buf;
+  int buf_offset;
+  uint32_t state;
+};
+
+struct AnsDecoder {
+  const uint8_t *buf;
+  int buf_offset;
+  uint32_t state;
+};
+
+typedef uint8_t AnsP8;
+#define ans_p8_precision 256u
+#define ans_p8_shift 8
+#define l_base (ans_p8_precision * 4)  // l_base % precision must be 0
+#define io_base 256
+// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 }
+
+static INLINE void ans_write_init(struct AnsCoder *const ans,
+                                  uint8_t *const buf) {
+  ans->buf = buf;
+  ans->buf_offset = 0;
+  ans->state = l_base;
+}
+
+static INLINE int ans_write_end(struct AnsCoder *const ans) {
+  mem_put_le24(ans->buf + ans->buf_offset, ans->state);
+  return ans->buf_offset + 3;
+}
+
+// rABS with descending spread
+// p or p0 takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rabs_desc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+  const AnsP8 p = ans_p8_precision - p0;
+  const unsigned l_s = val ? p : p0;
+  unsigned quot, rem;
+  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+    ans->buf[ans->buf_offset++] = ans->state % io_base;
+    ans->state /= io_base;
+  }
+  ANS_DIVREM(quot, rem, ans->state, l_s);
+  ans->state = quot * ans_p8_precision + rem + (val ? 0 : p);
+}
+
+#define ANS_IMPL1 0
+#define UNPREDICTABLE(x) x
+static INLINE int rabs_desc_read(struct AnsDecoder *ans, AnsP8 p0) {
+  int val;
+#if ANS_IMPL1
+  unsigned l_s;
+#else
+  unsigned quot, rem, x, xn;
+#endif
+  const AnsP8 p = ans_p8_precision - p0;
+  if (ans->state < l_base) {
+    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+  }
+#if ANS_IMPL1
+  val = ans->state % ans_p8_precision < p;
+  l_s = val ? p : p0;
+  ans->state = (ans->state / ans_p8_precision) * l_s +
+               ans->state % ans_p8_precision - (!val * p);
+#else
+  x = ans->state;
+  quot = x / ans_p8_precision;
+  rem = x % ans_p8_precision;
+  xn = quot * p;
+  val = rem < p;
+  if (UNPREDICTABLE(val)) {
+    ans->state = xn + rem;
+  } else {
+    // ans->state = quot * p0 + rem - p;
+    ans->state = x - xn - p;
+  }
+#endif
+  return val;
+}
+
+// rABS with ascending spread
+// p or p0 takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rabs_asc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+  const AnsP8 p = ans_p8_precision - p0;
+  const unsigned l_s = val ? p : p0;
+  unsigned quot, rem;
+  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+    ans->buf[ans->buf_offset++] = ans->state % io_base;
+    ans->state /= io_base;
+  }
+  ANS_DIVREM(quot, rem, ans->state, l_s);
+  ans->state = quot * ans_p8_precision + rem + (val ? p0 : 0);
+}
+
+static INLINE int rabs_asc_read(struct AnsDecoder *ans, AnsP8 p0) {
+  int val;
+#if ANS_IMPL1
+  unsigned l_s;
+#else
+  unsigned quot, rem, x, xn;
+#endif
+  const AnsP8 p = ans_p8_precision - p0;
+  if (ans->state < l_base) {
+    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+  }
+#if ANS_IMPL1
+  val = ans->state % ans_p8_precision < p;
+  l_s = val ? p : p0;
+  ans->state = (ans->state / ans_p8_precision) * l_s +
+               ans->state % ans_p8_precision - (!val * p);
+#else
+  x = ans->state;
+  quot = x / ans_p8_precision;
+  rem = x % ans_p8_precision;
+  xn = quot * p;
+  val = rem >= p0;
+  if (UNPREDICTABLE(val)) {
+    ans->state = xn + rem - p0;
+  } else {
+    // ans->state = quot * p0 + rem - p0;
+    ans->state = x - xn;
+  }
+#endif
+  return val;
+}
+
+#define rabs_read rabs_desc_read
+#define rabs_write rabs_desc_write
+
+// uABS with normalization
+static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+  AnsP8 p = ans_p8_precision - p0;
+  const unsigned l_s = val ? p : p0;
+  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+    ans->buf[ans->buf_offset++] = ans->state % io_base;
+    ans->state /= io_base;
+  }
+  if (!val)
+    ans->state = ANS_DIV(ans->state * ans_p8_precision, p0);
+  else
+    ans->state = ANS_DIV((ans->state + 1) * ans_p8_precision + p - 1, p) - 1;
+}
+
+static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
+  AnsP8 p = ans_p8_precision - p0;
+  int s;
+  // unsigned int xp1;
+  unsigned xp, sp;
+  unsigned state = ans->state;
+  if (state < l_base && ans->buf_offset > 0) {
+    state = state * io_base + ans->buf[--ans->buf_offset];
+  }
+  sp = state * p;
+  // xp1 = (sp + p) / ans_p8_precision;
+  xp = sp / ans_p8_precision;
+  // s = xp1 - xp;
+  s = (sp & 0xFF) >= p0;
+  if (UNPREDICTABLE(s))
+    ans->state = xp;
+  else
+    ans->state = state - xp;
+  return s;
+}
+
+static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
+  int s;
+  unsigned state = ans->state;
+  if (state < l_base && ans->buf_offset > 0) {
+    state = state * io_base + ans->buf[--ans->buf_offset];
+  }
+  s = (int)(state & 1);
+  ans->state = state >> 1;
+  return s;
+}
+
+struct rans_sym {
+  AnsP8 prob;
+  AnsP8 cum_prob;  // not-inclusive
+};
+
+struct rans_dec_sym {
+  uint8_t val;
+  AnsP8 prob;
+  AnsP8 cum_prob;  // not-inclusive
+};
+
+typedef struct rans_dec_sym rans_dec_lut[ans_p8_precision];
+
+static INLINE void rans_build_dec_tab(const AnsP8 token_probs[],
+                                      rans_dec_lut dec_tab) {
+  int val = 0;
+  int cum_prob = 0;
+  int sym_end = token_probs[0];
+  int i;
+  for (i = 0; i < 256; ++i) {
+    if (i == sym_end) {
+      ++val;
+      cum_prob = sym_end;
+      sym_end += token_probs[val];
+    }
+    dec_tab[i].val = val;
+    dec_tab[i].prob = token_probs[val];
+    dec_tab[i].cum_prob = cum_prob;
+  }
+}
+
+// rANS with normalization
+// sym->prob takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rans_write(struct AnsCoder *ans,
+                              const struct rans_sym *const sym) {
+  const AnsP8 p = sym->prob;
+  if (ans->state >= l_base / ans_p8_precision * io_base * p) {
+    ans->buf[ans->buf_offset++] = ans->state % io_base;
+    ans->state /= io_base;
+  }
+  ans->state =
+      (ans->state / p) * ans_p8_precision + ans->state % p + sym->cum_prob;
+}
+
+static INLINE int rans_read(struct AnsDecoder *ans,
+                            const rans_dec_lut tab) {
+  unsigned rem;
+  unsigned quo;
+  int val;
+  if (ans->state < l_base && ans->buf_offset > 0) {
+    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+  }
+  quo = ans->state / ans_p8_precision;
+  rem = ans->state % ans_p8_precision;
+  val = tab[rem].val;
+
+  ans->state = quo * tab[rem].prob + rem - tab[rem].cum_prob;
+  return val;
+}
+
+static INLINE int ans_read_init(struct AnsDecoder *const ans,
+                                const uint8_t *const buf,
+                                int offset) {
+  if (offset < 3)
+    return 1;
+  ans->buf = buf;
+  ans->buf_offset = offset - 3;
+  ans->state = mem_get_le24(buf + offset - 3);
+  return 0;
+}
+
+static INLINE int ans_read_end(struct AnsDecoder *const ans) {
+  return ans->state == l_base;
+}
+#undef ANS_DIVREM
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // VP10_COMMON_ANS_H_
diff --git a/vp10/common/divide.c b/vp10/common/divide.c
new file mode 100644
index 000000000..00b43a0f0
--- /dev/null
+++ b/vp10/common/divide.c
@@ -0,0 +1,93 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/divide.h"
+
+/* Constants for divide by multiply for small divisors generated with:
+void init_fastdiv() {
+  int i;
+  for (i = 3; i < 256; ++i) {
+    const int s = 31 ^ __builtin_clz(2 * i + 1);
+    const unsigned long long base = (1ull << (sizeof(unsigned) * 8 + s)) - 1;
+    fastdiv_tab[i].mult = (base / i + 1) & 0xFFFFFFFF;
+    fastdiv_tab[i].shift = s;
+  }
+  for (i = 0; i < 8; ++i) {
+    fastdiv_tab[1 << i].mult = 0;
+    fastdiv_tab[1 << i].shift = i;
+  }
+}
+*/
+const struct fastdiv_elem vp10_fastdiv_tab[256] = {
+    {0, 0},          {0, 0},          {0, 1},          {1431655766, 2},
+    {0, 2},          {2576980378, 3}, {1431655766, 3}, {613566757, 3},
+    {0, 3},          {3340530120, 4}, {2576980378, 4}, {1952257862, 4},
+    {1431655766, 4}, {991146300, 4},  {613566757, 4},  {286331154, 4},
+    {0, 4},          {3789677026, 5}, {3340530120, 5}, {2938661835, 5},
+    {2576980378, 5}, {2249744775, 5}, {1952257862, 5}, {1680639377, 5},
+    {1431655766, 5}, {1202590843, 5}, {991146300, 5},  {795364315, 5},
+    {613566757, 5},  {444306962, 5},  {286331154, 5},  {138547333, 5},
+    {0, 5},          {4034666248, 6}, {3789677026, 6}, {3558687189, 6},
+    {3340530120, 6}, {3134165325, 6}, {2938661835, 6}, {2753184165, 6},
+    {2576980378, 6}, {2409371898, 6}, {2249744775, 6}, {2097542168, 6},
+    {1952257862, 6}, {1813430637, 6}, {1680639377, 6}, {1553498810, 6},
+    {1431655766, 6}, {1314785907, 6}, {1202590843, 6}, {1094795586, 6},
+    {991146300, 6},  {891408307, 6},  {795364315, 6},  {702812831, 6},
+    {613566757, 6},  {527452125, 6},  {444306962, 6},  {363980280, 6},
+    {286331154, 6},  {211227900, 6},  {138547333, 6},  {68174085, 6},
+    {0, 6},          {4162814457, 7}, {4034666248, 7}, {3910343360, 7},
+    {3789677026, 7}, {3672508268, 7}, {3558687189, 7}, {3448072337, 7},
+    {3340530120, 7}, {3235934265, 7}, {3134165325, 7}, {3035110223, 7},
+    {2938661835, 7}, {2844718599, 7}, {2753184165, 7}, {2663967058, 7},
+    {2576980378, 7}, {2492141518, 7}, {2409371898, 7}, {2328596727, 7},
+    {2249744775, 7}, {2172748162, 7}, {2097542168, 7}, {2024065048, 7},
+    {1952257862, 7}, {1882064321, 7}, {1813430637, 7}, {1746305385, 7},
+    {1680639377, 7}, {1616385542, 7}, {1553498810, 7}, {1491936009, 7},
+    {1431655766, 7}, {1372618415, 7}, {1314785907, 7}, {1258121734, 7},
+    {1202590843, 7}, {1148159575, 7}, {1094795586, 7}, {1042467791, 7},
+    {991146300, 7},  {940802361, 7},  {891408307, 7},  {842937507, 7},
+    {795364315, 7},  {748664025, 7},  {702812831, 7},  {657787785, 7},
+    {613566757, 7},  {570128403, 7},  {527452125, 7},  {485518043, 7},
+    {444306962, 7},  {403800345, 7},  {363980280, 7},  {324829460, 7},
+    {286331154, 7},  {248469183, 7},  {211227900, 7},  {174592167, 7},
+    {138547333, 7},  {103079216, 7},  {68174085, 7},   {33818641, 7},
+    {0, 7},          {4228378656, 8}, {4162814457, 8}, {4098251237, 8},
+    {4034666248, 8}, {3972037425, 8}, {3910343360, 8}, {3849563281, 8},
+    {3789677026, 8}, {3730665024, 8}, {3672508268, 8}, {3615188300, 8},
+    {3558687189, 8}, {3502987511, 8}, {3448072337, 8}, {3393925206, 8},
+    {3340530120, 8}, {3287871517, 8}, {3235934265, 8}, {3184703642, 8},
+    {3134165325, 8}, {3084305374, 8}, {3035110223, 8}, {2986566663, 8},
+    {2938661835, 8}, {2891383213, 8}, {2844718599, 8}, {2798656110, 8},
+    {2753184165, 8}, {2708291480, 8}, {2663967058, 8}, {2620200175, 8},
+    {2576980378, 8}, {2534297473, 8}, {2492141518, 8}, {2450502814, 8},
+    {2409371898, 8}, {2368739540, 8}, {2328596727, 8}, {2288934667, 8},
+    {2249744775, 8}, {2211018668, 8}, {2172748162, 8}, {2134925265, 8},
+    {2097542168, 8}, {2060591247, 8}, {2024065048, 8}, {1987956292, 8},
+    {1952257862, 8}, {1916962805, 8}, {1882064321, 8}, {1847555765, 8},
+    {1813430637, 8}, {1779682582, 8}, {1746305385, 8}, {1713292966, 8},
+    {1680639377, 8}, {1648338801, 8}, {1616385542, 8}, {1584774030, 8},
+    {1553498810, 8}, {1522554545, 8}, {1491936009, 8}, {1461638086, 8},
+    {1431655766, 8}, {1401984144, 8}, {1372618415, 8}, {1343553873, 8},
+    {1314785907, 8}, {1286310003, 8}, {1258121734, 8}, {1230216764, 8},
+    {1202590843, 8}, {1175239808, 8}, {1148159575, 8}, {1121346142, 8},
+    {1094795586, 8}, {1068504060, 8}, {1042467791, 8}, {1016683080, 8},
+    {991146300, 8},  {965853890, 8},  {940802361, 8},  {915988286, 8},
+    {891408307, 8},  {867059126, 8},  {842937507, 8},  {819040276, 8},
+    {795364315, 8},  {771906565, 8},  {748664025, 8},  {725633745, 8},
+    {702812831, 8},  {680198441, 8},  {657787785, 8},  {635578121, 8},
+    {613566757, 8},  {591751050, 8},  {570128403, 8},  {548696263, 8},
+    {527452125, 8},  {506393524, 8},  {485518043, 8},  {464823301, 8},
+    {444306962, 8},  {423966729, 8},  {403800345, 8},  {383805589, 8},
+    {363980280, 8},  {344322273, 8},  {324829460, 8},  {305499766, 8},
+    {286331154, 8},  {267321616, 8},  {248469183, 8},  {229771913, 8},
+    {211227900, 8},  {192835267, 8},  {174592167, 8},  {156496785, 8},
+    {138547333, 8},  {120742053, 8},  {103079216, 8},  {85557118, 8},
+    {68174085, 8},   {50928466, 8},   {33818641, 8},   {16843010, 8},
+};
diff --git a/vp10/common/divide.h b/vp10/common/divide.h
new file mode 100644
index 000000000..2f3c35cf2
--- /dev/null
+++ b/vp10/common/divide.h
@@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_DIVIDE_H_
+#define VP10_COMMON_DIVIDE_H_
+// An implemntation of the divide by multiply alogrithm
+// https://gmplib.org/~tege/divcnst-pldi94.pdf
+
+#include <limits.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+struct fastdiv_elem {
+  unsigned mult;
+  unsigned shift;
+};
+
+extern const struct fastdiv_elem vp10_fastdiv_tab[256];
+
+static INLINE unsigned fastdiv(unsigned x, int y) {
+  unsigned t =
+      ((uint64_t)x * vp10_fastdiv_tab[y].mult) >> (sizeof(x) * CHAR_BIT);
+  return (t + x) >> vp10_fastdiv_tab[y].shift;
+}
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // VP10_COMMON_DIVIDE_H_
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index f8c211279..bc3d84aa1 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -74,6 +74,9 @@ VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h
 VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.h
 VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.c
 VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.c
 
 VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
 VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c