From: Alex Converse <aconverse@google.com>
Date: Wed, 28 Sep 2016 18:33:20 +0000 (-0700)
Subject: Port ANS from aom/master 25aaf40
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7fe2ae8e887560539453ac511b8dd9d1578675e3;p=libvpx

Port ANS from aom/master 25aaf40

Reconciles the following commits from aom/master to nextgenv2:
- 25aaf40bbc24beeb52de9af7d7624b7d7c6ce9de
- 87073de5693df70eba1c9b9be2b2732ed3b08fb3

Change-Id: Ideda50a6ec75485cb4fa7437c69f4e58d6a2ca73
---

diff --git a/aom_dsp/ans.c b/aom_dsp/ans.c
new file mode 100644
index 000000000..18f6d480f
--- /dev/null
+++ b/aom_dsp/ans.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <assert.h>
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/ans.h"
+#include "aom_dsp/prob.h"
+
+void aom_rans_build_cdf_from_pdf(const AnsP10 token_probs[], rans_lut cdf_tab) {
+  int i;
+  cdf_tab[0] = 0;
+  for (i = 1; cdf_tab[i - 1] < RANS_PRECISION; ++i) {
+    cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
+  }
+  assert(cdf_tab[i - 1] == RANS_PRECISION);
+}
+
+static int find_largest(const AnsP10 *const pdf_tab, int num_syms) {
+  int largest_idx = -1;
+  int largest_p = -1;
+  int i;
+  for (i = 0; i < num_syms; ++i) {
+    int p = pdf_tab[i];
+    if (p > largest_p) {
+      largest_p = p;
+      largest_idx = i;
+    }
+  }
+  return largest_idx;
+}
+
+void aom_rans_merge_prob8_pdf(AnsP10 *const out_pdf, const AnsP8 node_prob,
+                              const AnsP10 *const src_pdf, int in_syms) {
+  int i;
+  int adjustment = RANS_PRECISION;
+  const int round_fact = ANS_P8_PRECISION >> 1;
+  const AnsP8 p1 = ANS_P8_PRECISION - node_prob;
+  const int out_syms = in_syms + 1;
+  assert(src_pdf != out_pdf);
+
+  out_pdf[0] = node_prob << (10 - 8);
+  adjustment -= out_pdf[0];
+  for (i = 0; i < in_syms; ++i) {
+    int p = (p1 * src_pdf[i] + round_fact) >> ANS_P8_SHIFT;
+    p = AOMMIN(p, (int)RANS_PRECISION - in_syms);
+    p = AOMMAX(p, 1);
+    out_pdf[i + 1] = p;
+    adjustment -= p;
+  }
+
+  // Adjust probabilities so they sum to the total probability
+  if (adjustment > 0) {
+    i = find_largest(out_pdf, out_syms);
+    out_pdf[i] += adjustment;
+  } else {
+    while (adjustment < 0) {
+      i = find_largest(out_pdf, out_syms);
+      --out_pdf[i];
+      assert(out_pdf[i] > 0);
+      adjustment++;
+    }
+  }
+}
diff --git a/aom_dsp/ans.h b/aom_dsp/ans.h
index c526e275d..15fe729ee 100644
--- a/aom_dsp/ans.h
+++ b/aom_dsp/ans.h
@@ -1,413 +1,51 @@
 /*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
 #ifndef AOM_DSP_ANS_H_
 #define AOM_DSP_ANS_H_
-// An implementation of Asymmetric Numeral Systems
+// Constants, types and utilities for Asymmetric Numeral Systems
 // http://arxiv.org/abs/1311.2540v2
 
 #include <assert.h>
 #include "./aom_config.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/prob.h"
-#include "aom_ports/mem_ops.h"
-
-#define ANS_DIVIDE_BY_MULTIPLY 1
-#if ANS_DIVIDE_BY_MULTIPLY
-#include "aom_dsp/divide.h"
-#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
-  do {                                                     \
-    quotient = fastdiv(dividend, divisor);                 \
-    remainder = dividend - quotient * divisor;             \
-  } while (0)
-#define ANS_DIV(dividend, divisor) fastdiv(dividend, divisor)
-#else
-#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
-  do {                                                     \
-    quotient = dividend / divisor;                         \
-    remainder = dividend % divisor;                        \
-  } while (0)
-#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
-#endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus
 
-struct AnsCoder {
-  uint8_t *buf;
-  int buf_offset;
-  uint32_t state;
-};
-
-struct AnsDecoder {
-  const uint8_t *buf;
-  int buf_offset;
-  uint32_t state;
-};
-
 typedef uint8_t AnsP8;
-#define ans_p8_precision 256u
-#define ans_p8_shift 8
+#define ANS_P8_PRECISION 256u
+#define ANS_P8_SHIFT 8
 typedef uint16_t AnsP10;
-#define ans_p10_precision 1024u
-
-#define rans_precision ans_p10_precision
-
-#define l_base (ans_p10_precision * 4)  // l_base % precision must be 0
-#define io_base 256
-// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 }
-
-static INLINE void ans_write_init(struct AnsCoder *const ans,
-                                  uint8_t *const buf) {
-  ans->buf = buf;
-  ans->buf_offset = 0;
-  ans->state = l_base;
-}
-
-static INLINE int ans_write_end(struct AnsCoder *const ans) {
-  uint32_t state;
-  assert(ans->state >= l_base);
-  assert(ans->state < l_base * io_base);
-  state = ans->state - l_base;
-  if (state < (1 << 6)) {
-    ans->buf[ans->buf_offset] = (0x00 << 6) + state;
-    return ans->buf_offset + 1;
-  } else if (state < (1 << 14)) {
-    mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
-    return ans->buf_offset + 2;
-  } else if (state < (1 << 22)) {
-    mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
-    return ans->buf_offset + 3;
-  } else {
-    assert(0 && "State is too large to be serialized");
-    return ans->buf_offset;
-  }
-}
-
-// rABS with descending spread
-// p or p0 takes the place of l_s from the paper
-// ans_p8_precision is m
-static INLINE void rabs_desc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
-  const AnsP8 p = ans_p8_precision - p0;
-  const unsigned l_s = val ? p : p0;
-  unsigned quot, rem;
-  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
-    ans->buf[ans->buf_offset++] = ans->state % io_base;
-    ans->state /= io_base;
-  }
-  ANS_DIVREM(quot, rem, ans->state, l_s);
-  ans->state = quot * ans_p8_precision + rem + (val ? 0 : p);
-}
-
-#define ANS_IMPL1 0
-#define UNPREDICTABLE(x) x
-static INLINE int rabs_desc_read(struct AnsDecoder *ans, AnsP8 p0) {
-  int val;
-#if ANS_IMPL1
-  unsigned l_s;
-#else
-  unsigned quot, rem, x, xn;
-#endif
-  const AnsP8 p = ans_p8_precision - p0;
-  if (ans->state < l_base) {
-    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
-  }
-#if ANS_IMPL1
-  val = ans->state % ans_p8_precision < p;
-  l_s = val ? p : p0;
-  ans->state = (ans->state / ans_p8_precision) * l_s +
-               ans->state % ans_p8_precision - (!val * p);
-#else
-  x = ans->state;
-  quot = x / ans_p8_precision;
-  rem = x % ans_p8_precision;
-  xn = quot * p;
-  val = rem < p;
-  if (UNPREDICTABLE(val)) {
-    ans->state = xn + rem;
-  } else {
-    // ans->state = quot * p0 + rem - p;
-    ans->state = x - xn - p;
-  }
-#endif
-  return val;
-}
-
-// rABS with ascending spread
-// p or p0 takes the place of l_s from the paper
-// ans_p8_precision is m
-static INLINE void rabs_asc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
-  const AnsP8 p = ans_p8_precision - p0;
-  const unsigned l_s = val ? p : p0;
-  unsigned quot, rem;
-  if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
-    ans->buf[ans->buf_offset++] = ans->state % io_base;
-    ans->state /= io_base;
-  }
-  ANS_DIVREM(quot, rem, ans->state, l_s);
-  ans->state = quot * ans_p8_precision + rem + (val ? p0 : 0);
-}
-
-static INLINE int rabs_asc_read(struct AnsDecoder *ans, AnsP8 p0) {
-  int val;
-#if ANS_IMPL1
-  unsigned l_s;
-#else
-  unsigned quot, rem, x, xn;
-#endif
-  const AnsP8 p = ans_p8_precision - p0;
-  if (ans->state < l_base) {
-    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
-  }
-#if ANS_IMPL1
-  val = ans->state % ans_p8_precision < p;
-  l_s = val ? p : p0;
-  ans->state = (ans->state / ans_p8_precision) * l_s +
-               ans->state % ans_p8_precision - (!val * p);
-#else
-  x = ans->state;
-  quot = x / ans_p8_precision;
-  rem = x % ans_p8_precision;
-  xn = quot * p;
-  val = rem >= p0;
-  if (UNPREDICTABLE(val)) {
-    ans->state = xn + rem - p0;
-  } else {
-    // ans->state = quot * p0 + rem - p0;
-    ans->state = x - xn;
-  }
-#endif
-  return val;
-}
-
-#define rabs_read rabs_desc_read
-#define rabs_write rabs_desc_write
-
-// uABS with normalization
-static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
-  AnsP8 p = ans_p8_precision - p0;
-  const unsigned l_s = val ? p : p0;
-  while (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
-    ans->buf[ans->buf_offset++] = ans->state % io_base;
-    ans->state /= io_base;
-  }
-  if (!val)
-    ans->state = ANS_DIV(ans->state * ans_p8_precision, p0);
-  else
-    ans->state = ANS_DIV((ans->state + 1) * ans_p8_precision + p - 1, p) - 1;
-}
-
-static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
-  AnsP8 p = ans_p8_precision - p0;
-  int s;
-  // unsigned int xp1;
-  unsigned xp, sp;
-  unsigned state = ans->state;
-  while (state < l_base && ans->buf_offset > 0) {
-    state = state * io_base + ans->buf[--ans->buf_offset];
-  }
-  sp = state * p;
-  // xp1 = (sp + p) / ans_p8_precision;
-  xp = sp / ans_p8_precision;
-  // s = xp1 - xp;
-  s = (sp & 0xFF) >= p0;
-  if (UNPREDICTABLE(s))
-    ans->state = xp;
-  else
-    ans->state = state - xp;
-  return s;
-}
-
-static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
-  int s;
-  unsigned state = ans->state;
-  while (state < l_base && ans->buf_offset > 0) {
-    state = state * io_base + ans->buf[--ans->buf_offset];
-  }
-  s = (int)(state & 1);
-  ans->state = state >> 1;
-  return s;
-}
+#define ANS_P10_PRECISION 1024u
+#define RANS_PROB_BITS 10
 
-static INLINE int uabs_read_literal(struct AnsDecoder *ans, int bits) {
-  int literal = 0, bit;
-  assert(bits < 31);
+#define RANS_PRECISION ANS_P10_PRECISION
 
-  // TODO(aconverse): Investigate ways to read/write literals faster,
-  // e.g. 8-bit chunks.
-  for (bit = bits - 1; bit >= 0; bit--) literal |= uabs_read_bit(ans) << bit;
-
-  return literal;
-}
-
-// TODO(aconverse): Replace trees with tokensets.
-static INLINE int uabs_read_tree(struct AnsDecoder *ans,
-                                 const aom_tree_index *tree,
-                                 const AnsP8 *probs) {
-  aom_tree_index i = 0;
-
-  while ((i = tree[i + uabs_read(ans, probs[i >> 1])]) > 0) continue;
-
-  return -i;
-}
-
-struct rans_sym {
-  AnsP10 prob;
-  AnsP10 cum_prob;  // not-inclusive
-};
-
-struct rans_dec_sym {
-  uint8_t val;
-  AnsP10 prob;
-  AnsP10 cum_prob;  // not-inclusive
-};
+#define L_BASE (ANS_P10_PRECISION * 4)  // L_BASE % precision must be 0
+#define IO_BASE 256
+// Range I = { L_BASE, L_BASE + 1, ..., L_BASE * IO_BASE - 1 }
 
 // This is now just a boring cdf. It starts with an explicit zero.
 // TODO(aconverse): Remove starting zero.
-typedef uint16_t rans_dec_lut[16];
-
-static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[],
-                                           rans_dec_lut cdf_tab) {
-  int i;
-  cdf_tab[0] = 0;
-  for (i = 1; cdf_tab[i - 1] < rans_precision; ++i) {
-    cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
-  }
-  assert(cdf_tab[i - 1] == rans_precision);
-}
-
-static INLINE int ans_find_largest(const AnsP10 *const pdf_tab, int num_syms) {
-  int largest_idx = -1;
-  int largest_p = -1;
-  int i;
-  for (i = 0; i < num_syms; ++i) {
-    int p = pdf_tab[i];
-    if (p > largest_p) {
-      largest_p = p;
-      largest_idx = i;
-    }
-  }
-  return largest_idx;
-}
-
-static INLINE void rans_merge_prob8_pdf(AnsP10 *const out_pdf,
-                                        const AnsP8 node_prob,
-                                        const AnsP10 *const src_pdf,
-                                        int in_syms) {
-  int i;
-  int adjustment = rans_precision;
-  const int round_fact = ans_p8_precision >> 1;
-  const AnsP8 p1 = ans_p8_precision - node_prob;
-  const int out_syms = in_syms + 1;
-  assert(src_pdf != out_pdf);
-
-  out_pdf[0] = node_prob << (10 - 8);
-  adjustment -= out_pdf[0];
-  for (i = 0; i < in_syms; ++i) {
-    int p = (p1 * src_pdf[i] + round_fact) >> ans_p8_shift;
-    p = AOMMIN(p, (int)rans_precision - in_syms);
-    p = AOMMAX(p, 1);
-    out_pdf[i + 1] = p;
-    adjustment -= p;
-  }
-
-  // Adjust probabilities so they sum to the total probability
-  if (adjustment > 0) {
-    i = ans_find_largest(out_pdf, out_syms);
-    out_pdf[i] += adjustment;
-  } else {
-    while (adjustment < 0) {
-      i = ans_find_largest(out_pdf, out_syms);
-      --out_pdf[i];
-      assert(out_pdf[i] > 0);
-      adjustment++;
-    }
-  }
-}
-
-// rANS with normalization
-// sym->prob takes the place of l_s from the paper
-// ans_p10_precision is m
-static INLINE void rans_write(struct AnsCoder *ans,
-                              const struct rans_sym *const sym) {
-  const AnsP10 p = sym->prob;
-  while (ans->state >= l_base / rans_precision * io_base * p) {
-    ans->buf[ans->buf_offset++] = ans->state % io_base;
-    ans->state /= io_base;
-  }
-  ans->state =
-      (ans->state / p) * rans_precision + ans->state % p + sym->cum_prob;
-}
-
-static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
-                             AnsP10 rem) {
-  int i = 0;
-  // TODO(skal): if critical, could be a binary search.
-  // Or, better, an O(1) alias-table.
-  while (rem >= cdf[i]) {
-    ++i;
-  }
-  out->val = i - 1;
-  out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
-  out->cum_prob = (AnsP10)cdf[i - 1];
-}
-
-static INLINE int rans_read(struct AnsDecoder *ans, const rans_dec_lut tab) {
-  unsigned rem;
-  unsigned quo;
-  struct rans_dec_sym sym;
-  while (ans->state < l_base && ans->buf_offset > 0) {
-    ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
-  }
-  quo = ans->state / rans_precision;
-  rem = ans->state % rans_precision;
-  fetch_sym(&sym, tab, rem);
-  ans->state = quo * sym.prob + rem - sym.cum_prob;
-  return sym.val;
-}
-
-static INLINE int ans_read_init(struct AnsDecoder *const ans,
-                                const uint8_t *const buf, int offset) {
-  unsigned x;
-  if (offset < 1) return 1;
-  ans->buf = buf;
-  x = buf[offset - 1] >> 6;
-  if (x == 0) {
-    ans->buf_offset = offset - 1;
-    ans->state = buf[offset - 1] & 0x3F;
-  } else if (x == 1) {
-    if (offset < 2) return 1;
-    ans->buf_offset = offset - 2;
-    ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
-  } else if (x == 2) {
-    if (offset < 3) return 1;
-    ans->buf_offset = offset - 3;
-    ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
-  } else {
-    // x == 3 implies this byte is a superframe marker
-    return 1;
-  }
-  ans->state += l_base;
-  if (ans->state >= l_base * io_base) return 1;
-  return 0;
-}
+typedef uint16_t rans_lut[16];
+// TODO(aconverse): Update callers and remove this shim
+#define rans_dec_lut rans_lut
 
-static INLINE int ans_read_end(struct AnsDecoder *const ans) {
-  return ans->state == l_base;
-}
+void aom_rans_build_cdf_from_pdf(const AnsP10 token_probs[], rans_lut cdf_tab);
 
-static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
-  return ans->state < l_base && ans->buf_offset == 0;
-}
-#undef ANS_DIVREM
+void aom_rans_merge_prob8_pdf(AnsP10 *const out_pdf, const AnsP8 node_prob,
+                              const AnsP10 *const src_pdf, int in_syms);
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/aom_dsp/ansreader.h b/aom_dsp/ansreader.h
new file mode 100644
index 000000000..11619b030
--- /dev/null
+++ b/aom_dsp/ansreader.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_DSP_ANSREADER_H_
+#define AOM_DSP_ANSREADER_H_
+// A uABS and rANS decoder implementation of Asymmetric Numeral Systems
+// http://arxiv.org/abs/1311.2540v2
+
+#include <assert.h>
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/prob.h"
+#include "aom_dsp/ans.h"
+#include "aom_ports/mem_ops.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+struct AnsDecoder {
+  const uint8_t *buf;
+  int buf_offset;
+  uint32_t state;
+};
+
+static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
+  AnsP8 p = ANS_P8_PRECISION - p0;
+  int s;
+  unsigned xp, sp;
+  unsigned state = ans->state;
+  while (state < L_BASE && ans->buf_offset > 0) {
+    state = state * IO_BASE + ans->buf[--ans->buf_offset];
+  }
+  sp = state * p;
+  xp = sp / ANS_P8_PRECISION;
+  s = (sp & 0xFF) >= p0;
+  if (s)
+    ans->state = xp;
+  else
+    ans->state = state - xp;
+  return s;
+}
+
+static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
+  int s;
+  unsigned state = ans->state;
+  while (state < L_BASE && ans->buf_offset > 0) {
+    state = state * IO_BASE + ans->buf[--ans->buf_offset];
+  }
+  s = (int)(state & 1);
+  ans->state = state >> 1;
+  return s;
+}
+
+struct rans_dec_sym {
+  uint8_t val;
+  AnsP10 prob;
+  AnsP10 cum_prob;  // not-inclusive
+};
+
+static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_lut cdf,
+                             AnsP10 rem) {
+  int i = 0;
+  // TODO(skal): if critical, could be a binary search.
+  // Or, better, an O(1) alias-table.
+  while (rem >= cdf[i]) {
+    ++i;
+  }
+  out->val = i - 1;
+  out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
+  out->cum_prob = (AnsP10)cdf[i - 1];
+}
+
+static INLINE int rans_read(struct AnsDecoder *ans, const rans_lut tab) {
+  unsigned rem;
+  unsigned quo;
+  struct rans_dec_sym sym;
+  while (ans->state < L_BASE && ans->buf_offset > 0) {
+    ans->state = ans->state * IO_BASE + ans->buf[--ans->buf_offset];
+  }
+  quo = ans->state / RANS_PRECISION;
+  rem = ans->state % RANS_PRECISION;
+  fetch_sym(&sym, tab, rem);
+  ans->state = quo * sym.prob + rem - sym.cum_prob;
+  return sym.val;
+}
+
+static INLINE int ans_read_init(struct AnsDecoder *const ans,
+                                const uint8_t *const buf, int offset) {
+  unsigned x;
+  if (offset < 1) return 1;
+  ans->buf = buf;
+  x = buf[offset - 1] >> 6;
+  if (x == 0) {
+    ans->buf_offset = offset - 1;
+    ans->state = buf[offset - 1] & 0x3F;
+  } else if (x == 1) {
+    if (offset < 2) return 1;
+    ans->buf_offset = offset - 2;
+    ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
+  } else if (x == 2) {
+    if (offset < 3) return 1;
+    ans->buf_offset = offset - 3;
+    ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
+  } else {
+    // x == 3 implies this byte is a superframe marker
+    return 1;
+  }
+  ans->state += L_BASE;
+  if (ans->state >= L_BASE * IO_BASE) return 1;
+  return 0;
+}
+
+static INLINE int ans_read_end(struct AnsDecoder *const ans) {
+  return ans->state == L_BASE;
+}
+
+static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
+  return ans->state < L_BASE && ans->buf_offset == 0;
+}
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // AOM_DSP_ANSREADER_H_
diff --git a/aom_dsp/answriter.h b/aom_dsp/answriter.h
new file mode 100644
index 000000000..5a82d35a9
--- /dev/null
+++ b/aom_dsp/answriter.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_DSP_ANSWRITER_H_
+#define AOM_DSP_ANSWRITER_H_
+// A uABS and rANS encoder implementation of Asymmetric Numeral Systems
+// http://arxiv.org/abs/1311.2540v2
+
+#include <assert.h>
+#include "./aom_config.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/ans.h"
+#include "aom_dsp/prob.h"
+#include "aom_ports/mem_ops.h"
+
+#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+struct AnsCoder {
+  uint8_t *buf;
+  int buf_offset;
+  uint32_t state;
+};
+
+static INLINE void ans_write_init(struct AnsCoder *const ans,
+                                  uint8_t *const buf) {
+  ans->buf = buf;
+  ans->buf_offset = 0;
+  ans->state = L_BASE;
+}
+
+static INLINE int ans_write_end(struct AnsCoder *const ans) {
+  uint32_t state;
+  assert(ans->state >= L_BASE);
+  assert(ans->state < L_BASE * IO_BASE);
+  state = ans->state - L_BASE;
+  if (state < (1 << 6)) {
+    ans->buf[ans->buf_offset] = (0x00 << 6) + state;
+    return ans->buf_offset + 1;
+  } else if (state < (1 << 14)) {
+    mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
+    return ans->buf_offset + 2;
+  } else if (state < (1 << 22)) {
+    mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
+    return ans->buf_offset + 3;
+  } else {
+    assert(0 && "State is too large to be serialized");
+    return ans->buf_offset;
+  }
+}
+
+// uABS with normalization
+static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+  AnsP8 p = ANS_P8_PRECISION - p0;
+  const unsigned l_s = val ? p : p0;
+  while (ans->state >= L_BASE / ANS_P8_PRECISION * IO_BASE * l_s) {
+    ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
+    ans->state /= IO_BASE;
+  }
+  if (!val)
+    ans->state = ANS_DIV(ans->state * ANS_P8_PRECISION, p0);
+  else
+    ans->state = ANS_DIV((ans->state + 1) * ANS_P8_PRECISION + p - 1, p) - 1;
+}
+
+struct rans_sym {
+  AnsP10 prob;
+  AnsP10 cum_prob;  // not-inclusive
+};
+
+// rANS with normalization
+// sym->prob takes the place of l_s from the paper
+// ANS_P10_PRECISION is m
+static INLINE void rans_write(struct AnsCoder *ans,
+                              const struct rans_sym *const sym) {
+  const AnsP10 p = sym->prob;
+  while (ans->state >= L_BASE / RANS_PRECISION * IO_BASE * p) {
+    ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
+    ans->state /= IO_BASE;
+  }
+  ans->state =
+      (ans->state / p) * RANS_PRECISION + ans->state % p + sym->cum_prob;
+}
+
+#undef ANS_DIV
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // AOM_DSP_ANSWRITER_H_
diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index 25e7d8f96..7d14384f9 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -20,19 +20,19 @@ DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64)   += x86/synonyms.h
 DSP_SRCS-yes += prob.h
 DSP_SRCS-yes += prob.c
 DSP_SRCS-$(CONFIG_ANS) += ans.h
+DSP_SRCS-$(CONFIG_ANS) += ans.c
 
 ifeq ($(CONFIG_ENCODERS),yes)
+DSP_SRCS-$(CONFIG_ANS) += answriter.h
 DSP_SRCS-yes += bitwriter.h
 DSP_SRCS-yes += dkboolwriter.h
 DSP_SRCS-yes += dkboolwriter.c
 DSP_SRCS-yes += bitwriter_buffer.c
 DSP_SRCS-yes += bitwriter_buffer.h
-DSP_SRCS-$(CONFIG_ANS) += buf_ans.h
-DSP_SRCS-$(CONFIG_ANS) += buf_ans.c
-DSP_SRCS-$(CONFIG_ANS) += divide.h
-DSP_SRCS-$(CONFIG_ANS) += divide.c
 DSP_SRCS-yes += psnr.c
 DSP_SRCS-yes += psnr.h
+DSP_SRCS-$(CONFIG_ANS) += buf_ans.h
+DSP_SRCS-$(CONFIG_ANS) += buf_ans.c
 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.c
 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.h
 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += psnrhvs.c
@@ -40,6 +40,7 @@ DSP_SRCS-$(CONFIG_INTERNAL_STATS) += fastssim.c
 endif
 
 ifeq ($(CONFIG_DECODERS),yes)
+DSP_SRCS-$(CONFIG_ANS) += ansreader.h
 DSP_SRCS-yes += bitreader.h
 DSP_SRCS-yes += dkboolreader.h
 DSP_SRCS-yes += dkboolreader.c
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h
index 86c517339..611949a42 100644
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -13,12 +13,13 @@
 #define AOM_DSP_BITREADER_H_
 
 #include <assert.h>
+#include <limits.h>
 
 #include "./aom_config.h"
 #include "aom/aomdx.h"
 #include "aom/aom_integer.h"
 #if CONFIG_ANS
-#include "aom_dsp/ans.h"
+#include "aom_dsp/ansreader.h"
 #else
 #include "aom_dsp/dkboolreader.h"
 #endif
diff --git a/aom_dsp/buf_ans.c b/aom_dsp/buf_ans.c
index a62aaba6c..1386722b3 100644
--- a/aom_dsp/buf_ans.c
+++ b/aom_dsp/buf_ans.c
@@ -1,11 +1,12 @@
 /*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
 #include <string.h>
diff --git a/aom_dsp/buf_ans.h b/aom_dsp/buf_ans.h
index b3fdad9de..58d6e61fc 100644
--- a/aom_dsp/buf_ans.h
+++ b/aom_dsp/buf_ans.h
@@ -1,23 +1,25 @@
 /*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
 #ifndef AOM_DSP_BUF_ANS_H_
 #define AOM_DSP_BUF_ANS_H_
 // Buffered forward ANS writer.
-// Symbols are written to the writer in forward (decode) order and serialzed
+// Symbols are written to the writer in forward (decode) order and serialized
 // backwards due to ANS's stack like behavior.
 
 #include <assert.h>
 #include "./aom_config.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/ans.h"
+#include "aom_dsp/answriter.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -26,11 +28,14 @@ extern "C" {
 #define ANS_METHOD_UABS 0
 #define ANS_METHOD_RANS 1
 
+struct aom_internal_error_info *error;
+
 struct buffered_ans_symbol {
-  uint8_t method;  // one of ANS_METHOD_UABS or ANS_METHOD_RANS
-  // TODO(aconverse): Should be possible to write this interms of start for ABS
-  AnsP10 val_start;  // Boolean value for ABS, start in symbol cycle for Rans
-  AnsP10 prob;       // Probability of this symbol
+  unsigned int method : 1;  // one of ANS_METHOD_UABS or ANS_METHOD_RANS
+  // TODO(aconverse): Should be possible to write this in terms of start for ABS
+  unsigned int val_start : RANS_PROB_BITS;  // Boolean value for ABS
+                                            // start in symbol cycle for Rans
+  unsigned int prob : RANS_PROB_BITS;       // Probability of this symbol
 };
 
 struct BufAnsCoder {
diff --git a/aom_dsp/divide.c b/aom_dsp/divide.c
deleted file mode 100644
index 3e58da5e9..000000000
--- a/aom_dsp/divide.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "aom_dsp/divide.h"
-
-/* Constants for divide by multiply for small divisors generated with:
-void init_fastdiv() {
-  int i;
-  for (i = 3; i < 256; ++i) {
-    const int s = 31 ^ __builtin_clz(2 * i + 1);
-    const unsigned long long base = (1ull << (sizeof(unsigned) * 8 + s)) - 1;
-    fastdiv_tab[i].mult = (base / i + 1) & 0xFFFFFFFF;
-    fastdiv_tab[i].shift = s;
-  }
-  for (i = 0; i < 8; ++i) {
-    fastdiv_tab[1 << i].mult = 0;
-    fastdiv_tab[1 << i].shift = i;
-  }
-}
-*/
-const struct fastdiv_elem av1_fastdiv_tab[256] = {
-  { 0, 0 },           { 0, 0 },           { 0, 1 },
-  { 1431655766, 2 },  { 0, 2 },           { 2576980378u, 3 },
-  { 1431655766, 3 },  { 613566757, 3 },   { 0, 3 },
-  { 3340530120u, 4 }, { 2576980378u, 4 }, { 1952257862, 4 },
-  { 1431655766, 4 },  { 991146300, 4 },   { 613566757, 4 },
-  { 286331154u, 4 },  { 0, 4 },           { 3789677026u, 5 },
-  { 3340530120u, 5 }, { 2938661835u, 5 }, { 2576980378u, 5 },
-  { 2249744775u, 5 }, { 1952257862, 5 },  { 1680639377, 5 },
-  { 1431655766, 5 },  { 1202590843, 5 },  { 991146300, 5 },
-  { 795364315, 5 },   { 613566757, 5 },   { 444306962, 5 },
-  { 286331154, 5 },   { 138547333, 5 },   { 0, 5 },
-  { 4034666248u, 6 }, { 3789677026u, 6 }, { 3558687189u, 6 },
-  { 3340530120u, 6 }, { 3134165325u, 6 }, { 2938661835u, 6 },
-  { 2753184165u, 6 }, { 2576980378u, 6 }, { 2409371898u, 6 },
-  { 2249744775u, 6 }, { 2097542168u, 6 }, { 1952257862, 6 },
-  { 1813430637, 6 },  { 1680639377, 6 },  { 1553498810, 6 },
-  { 1431655766, 6 },  { 1314785907, 6 },  { 1202590843, 6 },
-  { 1094795586, 6 },  { 991146300, 6 },   { 891408307, 6 },
-  { 795364315, 6 },   { 702812831, 6 },   { 613566757, 6 },
-  { 527452125, 6 },   { 444306962, 6 },   { 363980280, 6 },
-  { 286331154, 6 },   { 211227900, 6 },   { 138547333, 6 },
-  { 68174085, 6 },    { 0, 6 },           { 4162814457u, 7 },
-  { 4034666248u, 7 }, { 3910343360u, 7 }, { 3789677026u, 7 },
-  { 3672508268u, 7 }, { 3558687189u, 7 }, { 3448072337u, 7 },
-  { 3340530120u, 7 }, { 3235934265u, 7 }, { 3134165325u, 7 },
-  { 3035110223u, 7 }, { 2938661835u, 7 }, { 2844718599u, 7 },
-  { 2753184165u, 7 }, { 2663967058u, 7 }, { 2576980378u, 7 },
-  { 2492141518u, 7 }, { 2409371898u, 7 }, { 2328596727u, 7 },
-  { 2249744775u, 7 }, { 2172748162u, 7 }, { 2097542168, 7 },
-  { 2024065048, 7 },  { 1952257862, 7 },  { 1882064321, 7 },
-  { 1813430637, 7 },  { 1746305385, 7 },  { 1680639377, 7 },
-  { 1616385542, 7 },  { 1553498810, 7 },  { 1491936009, 7 },
-  { 1431655766, 7 },  { 1372618415, 7 },  { 1314785907, 7 },
-  { 1258121734, 7 },  { 1202590843, 7 },  { 1148159575, 7 },
-  { 1094795586, 7 },  { 1042467791, 7 },  { 991146300, 7 },
-  { 940802361, 7 },   { 891408307, 7 },   { 842937507, 7 },
-  { 795364315, 7 },   { 748664025, 7 },   { 702812831, 7 },
-  { 657787785, 7 },   { 613566757, 7 },   { 570128403, 7 },
-  { 527452125, 7 },   { 485518043, 7 },   { 444306962, 7 },
-  { 403800345, 7 },   { 363980280, 7 },   { 324829460, 7 },
-  { 286331154, 7 },   { 248469183, 7 },   { 211227900, 7 },
-  { 174592167, 7 },   { 138547333, 7 },   { 103079216, 7 },
-  { 68174085, 7 },    { 33818641, 7 },    { 0, 7 },
-  { 4228378656u, 8 }, { 4162814457u, 8 }, { 4098251237u, 8 },
-  { 4034666248u, 8 }, { 3972037425u, 8 }, { 3910343360u, 8 },
-  { 3849563281u, 8 }, { 3789677026u, 8 }, { 3730665024u, 8 },
-  { 3672508268u, 8 }, { 3615188300u, 8 }, { 3558687189u, 8 },
-  { 3502987511u, 8 }, { 3448072337u, 8 }, { 3393925206u, 8 },
-  { 3340530120u, 8 }, { 3287871517u, 8 }, { 3235934265u, 8 },
-  { 3184703642u, 8 }, { 3134165325u, 8 }, { 3084305374u, 8 },
-  { 3035110223u, 8 }, { 2986566663u, 8 }, { 2938661835u, 8 },
-  { 2891383213u, 8 }, { 2844718599u, 8 }, { 2798656110u, 8 },
-  { 2753184165u, 8 }, { 2708291480u, 8 }, { 2663967058u, 8 },
-  { 2620200175u, 8 }, { 2576980378u, 8 }, { 2534297473u, 8 },
-  { 2492141518u, 8 }, { 2450502814u, 8 }, { 2409371898u, 8 },
-  { 2368739540u, 8 }, { 2328596727u, 8 }, { 2288934667u, 8 },
-  { 2249744775u, 8 }, { 2211018668u, 8 }, { 2172748162u, 8 },
-  { 2134925265u, 8 }, { 2097542168, 8 },  { 2060591247, 8 },
-  { 2024065048, 8 },  { 1987956292, 8 },  { 1952257862, 8 },
-  { 1916962805, 8 },  { 1882064321, 8 },  { 1847555765, 8 },
-  { 1813430637, 8 },  { 1779682582, 8 },  { 1746305385, 8 },
-  { 1713292966, 8 },  { 1680639377, 8 },  { 1648338801, 8 },
-  { 1616385542, 8 },  { 1584774030, 8 },  { 1553498810, 8 },
-  { 1522554545, 8 },  { 1491936009, 8 },  { 1461638086, 8 },
-  { 1431655766, 8 },  { 1401984144, 8 },  { 1372618415, 8 },
-  { 1343553873, 8 },  { 1314785907, 8 },  { 1286310003, 8 },
-  { 1258121734, 8 },  { 1230216764, 8 },  { 1202590843, 8 },
-  { 1175239808, 8 },  { 1148159575, 8 },  { 1121346142, 8 },
-  { 1094795586, 8 },  { 1068504060, 8 },  { 1042467791, 8 },
-  { 1016683080, 8 },  { 991146300, 8 },   { 965853890, 8 },
-  { 940802361, 8 },   { 915988286, 8 },   { 891408307, 8 },
-  { 867059126, 8 },   { 842937507, 8 },   { 819040276, 8 },
-  { 795364315, 8 },   { 771906565, 8 },   { 748664025, 8 },
-  { 725633745, 8 },   { 702812831, 8 },   { 680198441, 8 },
-  { 657787785, 8 },   { 635578121, 8 },   { 613566757, 8 },
-  { 591751050, 8 },   { 570128403, 8 },   { 548696263, 8 },
-  { 527452125, 8 },   { 506393524, 8 },   { 485518043, 8 },
-  { 464823301, 8 },   { 444306962, 8 },   { 423966729, 8 },
-  { 403800345, 8 },   { 383805589, 8 },   { 363980280, 8 },
-  { 344322273, 8 },   { 324829460, 8 },   { 305499766, 8 },
-  { 286331154, 8 },   { 267321616, 8 },   { 248469183, 8 },
-  { 229771913, 8 },   { 211227900, 8 },   { 192835267, 8 },
-  { 174592167, 8 },   { 156496785, 8 },   { 138547333, 8 },
-  { 120742053, 8 },   { 103079216, 8 },   { 85557118, 8 },
-  { 68174085, 8 },    { 50928466, 8 },    { 33818641, 8 },
-  { 16843010, 8 },
-};
diff --git a/aom_dsp/divide.h b/aom_dsp/divide.h
deleted file mode 100644
index c92a58f39..000000000
--- a/aom_dsp/divide.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef AOM_DSP_DIVIDE_H_
-#define AOM_DSP_DIVIDE_H_
-// An implemntation of the divide by multiply alogrithm
-// https://gmplib.org/~tege/divcnst-pldi94.pdf
-
-#include <limits.h>
-
-#include "./aom_config.h"
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-struct fastdiv_elem {
-  unsigned mult;
-  unsigned shift;
-};
-
-extern const struct fastdiv_elem av1_fastdiv_tab[256];
-
-static INLINE unsigned fastdiv(unsigned x, int y) {
-  unsigned t =
-      ((uint64_t)x * av1_fastdiv_tab[y].mult) >> (sizeof(x) * CHAR_BIT);
-  return (t + x) >> av1_fastdiv_tab[y].shift;
-}
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
-#endif  // AOM_DSP_DIVIDE_H_
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 25ac3ed91..c20a70159 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -2806,10 +2806,10 @@ void av1_build_token_cdfs(const aom_prob *pdf_model, rans_dec_lut cdf) {
   assert(pdf_model[2] != 0);
   // TODO(aconverse): Investigate making the precision of the zero and EOB tree
   // nodes 10-bits.
-  rans_merge_prob8_pdf(pdf_tab, pdf_model[1],
-                       av1_pareto8_token_probs[pdf_model[2] - 1],
-                       ENTROPY_TOKENS - 2);
-  rans_build_cdf_from_pdf(pdf_tab, cdf);
+  aom_rans_merge_prob8_pdf(pdf_tab, pdf_model[1],
+                           av1_pareto8_token_probs[pdf_model[2] - 1],
+                           ENTROPY_TOKENS - 2);
+  aom_rans_build_cdf_from_pdf(pdf_tab, cdf);
 }
 
 void av1_coef_pareto_cdfs(FRAME_CONTEXT *fc) {
diff --git a/test/ans_test.cc b/test/ans_test.cc
index 1e07f38d7..ca38de2b1 100644
--- a/test/ans_test.cc
+++ b/test/ans_test.cc
@@ -18,12 +18,9 @@
 
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
-#include "aom_dsp/ans.h"
 #include "test/acm_random.h"
-
-// Local hacks to resolve circular dependencies in these commits
-#define RANS_PRECISION rans_precision
-#define rans_lut rans_dec_lut
+#include "aom_dsp/ansreader.h"
+#include "aom_dsp/answriter.h"
 
 namespace {
 typedef std::vector<std::pair<uint8_t, bool> > PvVec;