From: Nathan E. Egge <negge@mozilla.com>
Date: Sun, 6 Mar 2016 15:59:29 +0000 (-0500)
Subject: Add Daala entropy coder.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1078dee569bb2cd7ab52c4134787c32b669288c0;p=libvpx

Add Daala entropy coder.

Change-Id: I2849a50163268d58cc5d80aacfec1fd02299ca43
---

diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index 04bf0480b..9db0fa797 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -52,6 +52,15 @@ endif
 # intra predictions
 DSP_SRCS-yes += intrapred.c
 
+ifeq ($(CONFIG_DAALA_EC),yes)
+DSP_SRCS-yes += entenc.c
+DSP_SRCS-yes += entenc.h
+DSP_SRCS-yes += entdec.c
+DSP_SRCS-yes += entdec.h
+DSP_SRCS-yes += entcode.c
+DSP_SRCS-yes += entcode.h
+endif
+
 DSP_SRCS-$(HAVE_SSE) += x86/intrapred_sse2.asm
 DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm
 DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm
diff --git a/aom_dsp/entcode.c b/aom_dsp/entcode.c
new file mode 100644
index 000000000..37aaa0e2e
--- /dev/null
+++ b/aom_dsp/entcode.c
@@ -0,0 +1,84 @@
+/*Daala video codec
+Copyright (c) 2001-2012 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "entcode.h"
+
+/*CDFs for uniform probability distributions of small sizes (2 through 16,
+   inclusive).*/
+const uint16_t OD_UNIFORM_CDFS_Q15[135] = {
+  16384, 32768, 10923, 21845, 32768, 8192,  16384, 24576, 32768, 6554,  13107,
+  19661, 26214, 32768, 5461,  10923, 16384, 21845, 27307, 32768, 4681,  9362,
+  14043, 18725, 23406, 28087, 32768, 4096,  8192,  12288, 16384, 20480, 24576,
+  28672, 32768, 3641,  7282,  10923, 14564, 18204, 21845, 25486, 29127, 32768,
+  3277,  6554,  9830,  13107, 16384, 19661, 22938, 26214, 29491, 32768, 2979,
+  5958,  8937,  11916, 14895, 17873, 20852, 23831, 26810, 29789, 32768, 2731,
+  5461,  8192,  10923, 13653, 16384, 19115, 21845, 24576, 27307, 30037, 32768,
+  2521,  5041,  7562,  10082, 12603, 15124, 17644, 20165, 22686, 25206, 27727,
+  30247, 32768, 2341,  4681,  7022,  9362,  11703, 14043, 16384, 18725, 21065,
+  23406, 25746, 28087, 30427, 32768, 2185,  4369,  6554,  8738,  10923, 13107,
+  15292, 17476, 19661, 21845, 24030, 26214, 28399, 30583, 32768, 2048,  4096,
+  6144,  8192,  10240, 12288, 14336, 16384, 18432, 20480, 22528, 24576, 26624,
+  28672, 30720, 32768
+};
+
+/*Given the current total integer number of bits used and the current value of
+   rng, computes the fraction number of bits used to OD_BITRES precision.
+  This is used by od_ec_enc_tell_frac() and od_ec_dec_tell_frac().
+  nbits_total: The number of whole bits currently used, i.e., the value
+                returned by od_ec_enc_tell() or od_ec_dec_tell().
+  rng: The current value of rng from either the encoder or decoder state.
+  Return: The number of bits scaled by 2**OD_BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+uint32_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) {
+  uint32_t nbits;
+  int l;
+  int i;
+  /*To handle the non-integral number of bits still left in the encoder/decoder
+     state, we compute the worst-case number of bits of val that must be
+     encoded to ensure that the value is inside the range for any possible
+     subsequent bits.
+    The computation here is independent of val itself (the decoder does not
+     even track that value), even though the real number of bits used after
+     od_ec_enc_done() may be 1 smaller if rng is a power of two and the
+     corresponding trailing bits of val are all zeros.
+    If we did try to track that special case, then coding a value with a
+     probability of 1/(1 << n) might sometimes appear to use more than n bits.
+    This may help explain the surprising result that a newly initialized
+     encoder or decoder claims to have used 1 bit.*/
+  nbits = nbits_total << OD_BITRES;
+  l = 0;
+  for (i = OD_BITRES; i-- > 0;) {
+    int b;
+    rng = rng * rng >> 15;
+    b = (int)(rng >> 16);
+    l = l << 1 | b;
+    rng >>= b;
+  }
+  return nbits - l;
+}
diff --git a/aom_dsp/entcode.h b/aom_dsp/entcode.h
new file mode 100644
index 000000000..77ed17176
--- /dev/null
+++ b/aom_dsp/entcode.h
@@ -0,0 +1,118 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_entcode_H)
+#define _entcode_H (1)
+#include <limits.h>
+#include <stddef.h>
+#include "av1/common/odintrin.h"
+
+/*Set this flag 1 to enable a "reduced overhead" version of the entropy coder.
+  This uses a partition function that more accurately follows the input
+   probability estimates at the expense of some additional CPU cost (though
+   still an order of magnitude less than a full division).
+
+  In classic arithmetic coding, the partition function maps a value x in the
+   range [0, ft] to a value in y in [0, r] with 0 < ft <= r via
+    y = x*r/ft.
+  Any deviation from this value increases coding inefficiency.
+
+  To avoid divisions, we require ft <= r < 2*ft (enforcing it by shifting up
+   ft if necessary), and replace that function with
+    y = x + OD_MINI(x, r - ft).
+  This counts values of x smaller than r - ft double compared to values larger
+   than r - ft, which over-estimates the probability of symbols at the start of
+   the alphabet, and under-estimates the probability of symbols at the end of
+   the alphabet.
+  The overall coding inefficiency assuming accurate probability models and
+   independent symbols is in the 1% range, which is similar to that of CABAC.
+
+  To reduce overhead even further, we split this into two cases:
+  1) r - ft > ft - (r - ft).
+     That is, we have more values of x that are double-counted than
+      single-counted.
+     In this case, we still double-count the first 2*r - 3*ft values of x, but
+      after that we alternate between single-counting and double-counting for
+      the rest.
+  2) r - ft < ft - (r - ft).
+     That is, we have more values of x that are single-counted than
+      double-counted.
+     In this case, we alternate between single-counting and double-counting for
+      the first 2*(r - ft) values of x, and single-count the rest.
+  For two equiprobable symbols in different places in the alphabet, this
+   reduces the maximum ratio of over-estimation to under-estimation from 2:1
+   for the previous partition function to either 4:3 or 3:2 (for each of the
+   two cases above, respectively), assuming symbol probabilities significantly
+   greater than 1/32768.
+  That reduces the worst-case per-symbol overhead from 1 bit to 0.58 bits.
+
+  The resulting function is
+    e = OD_MAXI(2*r - 3*ft, 0);
+    y = x + OD_MINI(x, e) + OD_MINI(OD_MAXI(x - e, 0) >> 1, r - ft).
+  Here, e is a value that is greater than 0 in case 1, and 0 in case 2.
+  This function is about 3 times as expensive to evaluate as the high-overhead
+   version, but still an order of magnitude cheaper than a division, since it
+   is composed only of very simple operations.
+  Because we want to fit in 16-bit registers and must use unsigned values to do
+   so, we use saturating subtraction to enforce the maximums with 0.
+
+  Enabling this reduces the measured overhead in ectest from 0.805% to 0.621%
+   (vs. 0.022% for the division-based partition function with r much greater
+   than ft).
+  It improves performance on ntt-short-1 by about 0.3%.*/
+#define OD_EC_REDUCED_OVERHEAD (1)
+
+/*OPT: od_ec_window must be at least 32 bits, but if you have fast arithmetic
+   on a larger type, you can speed up the decoder by using it here.*/
+typedef uint32_t od_ec_window;
+
+#define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window) * CHAR_BIT)
+
+/*Unsigned subtraction with unsigned saturation.
+  This implementation of the macro is intentionally chosen to increase the
+   number of common subexpressions in the reduced-overhead partition function.
+  This matters for C code, but it would not for hardware with a saturating
+   subtraction instruction.*/
+#define OD_SUBSATU(a, b) ((a)-OD_MINI(a, b))
+
+/*The number of bits to use for the range-coded part of unsigned integers.*/
+#define OD_EC_UINT_BITS (4)
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+   3 => 1/8th bits.*/
+#define OD_BITRES (3)
+
+extern const uint16_t OD_UNIFORM_CDFS_Q15[135];
+
+/*Returns a Q15 CDF for a uniform probability distribution of the given size.
+  n: The size of the distribution.
+     This must be at least 2, and no more than 16.*/
+#define OD_UNIFORM_CDF_Q15(n) (OD_UNIFORM_CDFS_Q15 + ((n) * ((n)-1) >> 1) - 1)
+
+/*See entcode.c for further documentation.*/
+
+OD_WARN_UNUSED_RESULT uint32_t od_ec_tell_frac(uint32_t nbits_total,
+                                               uint32_t rng);
+
+#endif
diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c
new file mode 100644
index 000000000..4dcc0a1e6
--- /dev/null
+++ b/aom_dsp/entdec.c
@@ -0,0 +1,534 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "entdec.h"
+#if OD_ACCOUNTING
+#include "accounting.h"
+#endif
+
+/*A range decoder.
+  This is an entropy decoder based upon \cite{Mar79}, which is itself a
+   rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
+  It is very similar to arithmetic encoding, except that encoding is done with
+   digits in any base, instead of with bits, and so it is faster when using
+   larger bases (i.e.: a byte).
+  The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
+   is the base, longer than the theoretical optimum, but to my knowledge there
+   is no published justification for this claim.
+  This only seems true when using near-infinite precision arithmetic so that
+   the process is carried out with no rounding errors.
+
+  An excellent description of implementation details is available at
+   http://www.arturocampos.com/ac_range.html
+  A recent work \cite{MNW98} which proposes several changes to arithmetic
+   encoding for efficiency actually re-discovers many of the principles
+   behind range encoding, and presents a good theoretical analysis of them.
+
+  End of stream is handled by writing out the smallest number of bits that
+   ensures that the stream will be correctly decoded regardless of the value of
+   any subsequent bits.
+  od_ec_dec_tell() can be used to determine how many bits were needed to decode
+   all the symbols thus far; other data can be packed in the remaining bits of
+   the input buffer.
+  @PHDTHESIS{Pas76,
+    author="Richard Clark Pasco",
+    title="Source coding algorithms for fast data compression",
+    school="Dept. of Electrical Engineering, Stanford University",
+    address="Stanford, CA",
+    month=May,
+    year=1976,
+    URL="http://www.richpasco.org/scaffdc.pdf"
+  }
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video & Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul,
+   URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
+  }*/
+
+#if OD_ACCOUNTING
+#define od_ec_dec_normalize(dec, dif, rng, ret, str) \
+  od_ec_dec_normalize_(dec, dif, rng, ret, str)
+static void od_process_accounting(od_ec_dec *dec, char *str) {
+  if (dec->acct != NULL) {
+    uint32_t tell;
+    tell = od_ec_dec_tell_frac(dec);
+    OD_ASSERT(tell >= dec->acct->last_tell);
+    od_accounting_record(dec->acct, str, tell - dec->acct->last_tell);
+    dec->acct->last_tell = tell;
+  }
+}
+#else
+#define od_ec_dec_normalize(dec, dif, rng, ret, str) \
+  od_ec_dec_normalize_(dec, dif, rng, ret)
+#endif
+
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+#define OD_EC_LOTS_OF_BITS (0x4000)
+
+static void od_ec_dec_refill(od_ec_dec *dec) {
+  int s;
+  od_ec_window dif;
+  int16_t cnt;
+  const unsigned char *bptr;
+  const unsigned char *end;
+  dif = dec->dif;
+  cnt = dec->cnt;
+  bptr = dec->bptr;
+  end = dec->end;
+  s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15);
+  for (; s >= 0 && bptr < end; s -= 8, bptr++) {
+    OD_ASSERT(s <= OD_EC_WINDOW_SIZE - 8);
+    dif |= (od_ec_window)bptr[0] << s;
+    cnt += 8;
+  }
+  if (bptr >= end) {
+    dec->tell_offs += OD_EC_LOTS_OF_BITS - cnt;
+    cnt = OD_EC_LOTS_OF_BITS;
+  }
+  dec->dif = dif;
+  dec->cnt = cnt;
+  dec->bptr = bptr;
+}
+
+/*Takes updated dif and range values, renormalizes them so that
+   32768 <= rng < 65536 (reading more bytes from the stream into dif if
+   necessary), and stores them back in the decoder context.
+  dif: The new value of dif.
+  rng: The new value of the range.
+  ret: The value to return.
+  Return: ret.
+          This allows the compiler to jump to this function via a tail-call.*/
+static int od_ec_dec_normalize_(od_ec_dec *dec, od_ec_window dif, unsigned rng,
+                                int ret OD_ACC_STR) {
+  int d;
+  OD_ASSERT(rng <= 65535U);
+  d = 16 - OD_ILOG_NZ(rng);
+  dec->cnt -= d;
+  dec->dif = dif << d;
+  dec->rng = rng << d;
+  if (dec->cnt < 0) od_ec_dec_refill(dec);
+#if OD_ACCOUNTING
+  od_process_accounting(dec, acc_str);
+#endif
+  return ret;
+}
+
+/*Initializes the decoder.
+  buf: The input buffer to use.
+  Return: 0 on success, or a negative value on error.*/
+void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf,
+                    uint32_t storage) {
+  dec->buf = buf;
+  dec->eptr = buf + storage;
+  dec->end_window = 0;
+  dec->nend_bits = 0;
+  dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8);
+  dec->end = buf + storage;
+  dec->bptr = buf;
+  dec->dif = 0;
+  dec->rng = 0x8000;
+  dec->cnt = -15;
+  dec->error = 0;
+  od_ec_dec_refill(dec);
+#if OD_ACCOUNTING
+  dec->acct = NULL;
+#endif
+}
+
+/*Decode a bit that has an fz/ft probability of being a zero.
+  fz: The probability that the bit is zero, scaled by _ft.
+  ft: The total probability.
+      This must be at least 16384 and no more than 32768.
+  Return: The value decoded (0 or 1).*/
+int od_ec_decode_bool_(od_ec_dec *dec, unsigned fz, unsigned ft OD_ACC_STR) {
+  od_ec_window dif;
+  od_ec_window vw;
+  unsigned r;
+  int s;
+  unsigned v;
+  int ret;
+  OD_ASSERT(0 < fz);
+  OD_ASSERT(fz < ft);
+  OD_ASSERT(16384 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(ft <= r);
+  s = r - ft >= ft;
+  ft <<= s;
+  fz <<= s;
+  OD_ASSERT(r - ft < ft);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned d;
+    unsigned e;
+    d = r - ft;
+    e = OD_SUBSATU(2 * d, ft);
+    v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
+  }
+#else
+  v = fz + OD_MINI(fz, r - ft);
+#endif
+  vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
+  ret = dif >= vw;
+  if (ret) dif -= vw;
+  r = ret ? r - v : v;
+  return od_ec_dec_normalize(dec, dif, r, ret, acc_str);
+}
+
+/*Decode a bit that has an fz probability of being a zero in Q15.
+  This is a simpler, lower overhead version of od_ec_decode_bool() for use when
+   ft == 32768.
+  To be decoded properly by this function, symbols cannot have been encoded by
+   od_ec_encode(), but must have been encoded with one of the equivalent _q15()
+   or _dyadic() functions instead.
+  fz: The probability that the bit is zero, scaled by 32768.
+  Return: The value decoded (0 or 1).*/
+int od_ec_decode_bool_q15_(od_ec_dec *dec, unsigned fz OD_ACC_STR) {
+  od_ec_window dif;
+  od_ec_window vw;
+  unsigned r;
+  unsigned v;
+  int ret;
+  OD_ASSERT(0 < fz);
+  OD_ASSERT(fz < 32768U);
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(32768U <= r);
+  v = fz * (uint32_t)r >> 15;
+  vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
+  ret = dif >= vw;
+  if (ret) dif -= vw;
+  r = ret ? r - v : v;
+  return od_ec_dec_normalize(dec, dif, r, ret, acc_str);
+}
+
+/*Decodes a symbol given a cumulative distribution function (CDF) table.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-increasing, and cdf[nsyms - 1]
+        must be at least 16384, and no more than 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf_(od_ec_dec *dec, const uint16_t *cdf,
+                      int nsyms OD_ACC_STR) {
+  od_ec_window dif;
+  unsigned r;
+  unsigned c;
+  unsigned d;
+#if OD_EC_REDUCED_OVERHEAD
+  unsigned e;
+#endif
+  int s;
+  unsigned u;
+  unsigned v;
+  unsigned q;
+  unsigned fl;
+  unsigned fh;
+  unsigned ft;
+  int ret;
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(nsyms > 0);
+  ft = cdf[nsyms - 1];
+  OD_ASSERT(16384 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  OD_ASSERT(ft <= r);
+  s = r - ft >= ft;
+  ft <<= s;
+  d = r - ft;
+  OD_ASSERT(d < ft);
+  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+  q = OD_MAXI((int)(c >> 1), (int)(c - d));
+#if OD_EC_REDUCED_OVERHEAD
+  e = OD_SUBSATU(2 * d, ft);
+  /*The correctness of this inverse partition function is not obvious, but it
+     was checked exhaustively for all possible values of r, ft, and c.
+    TODO: It should be possible to optimize this better than the compiler,
+     given that we do not care about the accuracy of negative results (as we
+     will not use them).
+    It would also be nice to get rid of the 32-bit dividend, as it requires a
+     32x32->64 bit multiply to invert.*/
+  q = OD_MAXI((int)q, (int)((2 * (int32_t)c + 1 - (int32_t)e) / 3));
+#endif
+  q >>= s;
+  OD_ASSERT(q<ft>> s);
+  fl = 0;
+  ret = 0;
+  for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
+  OD_ASSERT(fh <= ft >> s);
+  fl <<= s;
+  fh <<= s;
+#if OD_EC_REDUCED_OVERHEAD
+  u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+  v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
+  return od_ec_dec_normalize(dec, dif, r, ret, acc_str);
+}
+
+/*Decodes a symbol given a cumulative distribution function (CDF) table.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-increasing, and cdf[nsyms - 1]
+       must be at least 2, and no more than 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf_unscaled_(od_ec_dec *dec, const uint16_t *cdf,
+                               int nsyms OD_ACC_STR) {
+  od_ec_window dif;
+  unsigned r;
+  unsigned c;
+  unsigned d;
+#if OD_EC_REDUCED_OVERHEAD
+  unsigned e;
+#endif
+  int s;
+  unsigned u;
+  unsigned v;
+  unsigned q;
+  unsigned fl;
+  unsigned fh;
+  unsigned ft;
+  int ret;
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(nsyms > 0);
+  ft = cdf[nsyms - 1];
+  OD_ASSERT(2 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  s = 15 - OD_ILOG_NZ(ft - 1);
+  ft <<= s;
+  OD_ASSERT(ft <= r);
+  if (r - ft >= ft) {
+    ft <<= 1;
+    s++;
+  }
+  d = r - ft;
+  OD_ASSERT(d < ft);
+  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+  q = OD_MAXI((int)(c >> 1), (int)(c - d));
+#if OD_EC_REDUCED_OVERHEAD
+  e = OD_SUBSATU(2 * d, ft);
+  /*TODO: See TODO above.*/
+  q = OD_MAXI((int)q, (int)((2 * (int32_t)c + 1 - (int32_t)e) / 3));
+#endif
+  q >>= s;
+  OD_ASSERT(q<ft>> s);
+  fl = 0;
+  ret = 0;
+  for (fh = cdf[ret]; fh <= q; fh = cdf[++ret]) fl = fh;
+  OD_ASSERT(fh <= ft >> s);
+  fl <<= s;
+  fh <<= s;
+#if OD_EC_REDUCED_OVERHEAD
+  u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+  v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
+  return od_ec_dec_normalize(dec, dif, r, ret, acc_str);
+}
+
+/*Decodes a symbol given a cumulative distribution function (CDF) table that
+   sums to a power of two.
+  This is a simpler, lower overhead version of od_ec_decode_cdf() for use when
+   cdf[nsyms - 1] is a power of two.
+  To be decoded properly by this function, symbols cannot have been encoded by
+   od_ec_encode(), but must have been encoded with one of the equivalent _q15()
+   functions instead.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-increasing, and cdf[nsyms - 1]
+       must be exactly 1 << ftb.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  ftb: The number of bits of precision in the cumulative distribution.
+       This must be no more than 15.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf_unscaled_dyadic_(od_ec_dec *dec, const uint16_t *cdf,
+                                      int nsyms, unsigned ftb OD_ACC_STR) {
+  od_ec_window dif;
+  unsigned r;
+  unsigned c;
+  unsigned u;
+  unsigned v;
+  int ret;
+  (void)nsyms;
+  dif = dec->dif;
+  r = dec->rng;
+  OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  OD_ASSERT(ftb <= 15);
+  OD_ASSERT(cdf[nsyms - 1] == 1U << ftb);
+  OD_ASSERT(32768U <= r);
+  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+  v = 0;
+  ret = -1;
+  do {
+    u = v;
+    v = cdf[++ret] * (uint32_t)r >> ftb;
+  } while (v <= c);
+  OD_ASSERT(v <= r);
+  r = v - u;
+  dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
+  return od_ec_dec_normalize(dec, dif, r, ret, acc_str);
+}
+
+/*Decodes a symbol given a cumulative distribution function (CDF) table in Q15.
+  This is a simpler, lower overhead version of od_ec_decode_cdf() for use when
+   cdf[nsyms - 1] == 32768.
+  To be decoded properly by this function, symbols cannot have been encoded by
+   od_ec_encode(), but must have been encoded with one of the equivalent _q15()
+   or dyadic() functions instead.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-increasing, and cdf[nsyms - 1]
+        must be 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf_q15_(od_ec_dec *dec, const uint16_t *cdf,
+                          int nsyms OD_ACC_STR) {
+  return od_ec_decode_cdf_unscaled_dyadic(dec, cdf, nsyms, 15, acc_str);
+}
+
+/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
+  The integer must have been encoded with od_ec_enc_uint().
+  ft: The number of integers that can be decoded (one more than the max).
+      This must be at least 2, and no more than 2**29.
+  Return: The decoded bits.*/
+uint32_t od_ec_dec_uint_(od_ec_dec *dec, uint32_t ft OD_ACC_STR) {
+  OD_ASSERT(ft >= 2);
+  OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS));
+  if (ft > 1U << OD_EC_UINT_BITS) {
+    uint32_t t;
+    int ft1;
+    int ftb;
+    ft--;
+    ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
+    ft1 = (int)(ft >> ftb) + 1;
+    t = od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft1), ft1, acc_str);
+    t = t << ftb | od_ec_dec_bits(dec, ftb, acc_str);
+    if (t <= ft) return t;
+    dec->error = 1;
+    return ft;
+  }
+  return od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft), (int)ft, acc_str);
+}
+
+/*Extracts a sequence of raw bits from the stream.
+  The bits must have been encoded with od_ec_enc_bits().
+  ftb: The number of bits to extract.
+       This must be between 0 and 25, inclusive.
+  Return: The decoded bits.*/
+uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb OD_ACC_STR) {
+  od_ec_window window;
+  int available;
+  uint32_t ret;
+  OD_ASSERT(ftb <= 25);
+  window = dec->end_window;
+  available = dec->nend_bits;
+  if ((unsigned)available < ftb) {
+    const unsigned char *buf;
+    const unsigned char *eptr;
+    buf = dec->buf;
+    eptr = dec->eptr;
+    OD_ASSERT(available <= OD_EC_WINDOW_SIZE - 8);
+    do {
+      if (eptr <= buf) {
+        dec->tell_offs += OD_EC_LOTS_OF_BITS - available;
+        available = OD_EC_LOTS_OF_BITS;
+        break;
+      }
+      window |= (od_ec_window) * --eptr << available;
+      available += 8;
+    } while (available <= OD_EC_WINDOW_SIZE - 8);
+    dec->eptr = eptr;
+  }
+  ret = (uint32_t)window & (((uint32_t)1 << ftb) - 1);
+  window >>= ftb;
+  available -= ftb;
+  dec->end_window = window;
+  dec->nend_bits = available;
+#if OD_ACCOUNTING
+  od_process_accounting(dec, acc_str);
+#endif
+  return ret;
+}
+
+/*Returns the number of bits "used" by the decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+int od_ec_dec_tell(od_ec_dec *dec) {
+  return ((dec->end - dec->eptr) + (dec->bptr - dec->buf)) * 8 - dec->cnt -
+         dec->nend_bits + dec->tell_offs;
+}
+
+/*Returns the number of bits "used" by the decoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Return: The number of bits scaled by 2**OD_BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+uint32_t od_ec_dec_tell_frac(od_ec_dec *dec) {
+  return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng);
+}
diff --git a/aom_dsp/entdec.h b/aom_dsp/entdec.h
new file mode 100644
index 000000000..110b9daf2
--- /dev/null
+++ b/aom_dsp/entdec.h
@@ -0,0 +1,143 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_entdec_H)
+#define _entdec_H (1)
+#include <limits.h>
+#include "entcode.h"
+#if OD_ACCOUNTING
+#include "accounting.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct od_ec_dec od_ec_dec;
+
+#if OD_ACCOUNTING
+#define OD_ACC_STR , char *acc_str
+#define od_ec_decode_bool(dec, fz, ft, str) od_ec_decode_bool_(dec, fz, ft, str)
+#define od_ec_decode_bool_q15(dec, fz, str) od_ec_decode_bool_q15_(dec, fz, str)
+#define od_ec_decode_cdf(dec, cdf, nsyms, str) \
+  od_ec_decode_cdf_(dec, cdf, nsyms, str)
+#define od_ec_decode_cdf_q15(dec, cdf, nsyms, str) \
+  od_ec_decode_cdf_q15_(dec, cdf, nsyms, str)
+#define od_ec_decode_cdf_unscaled(dec, cdf, nsyms, str) \
+  od_ec_decode_cdf_unscaled_(dec, cdf, nsyms, str)
+#define od_ec_decode_cdf_unscaled_dyadic(dec, cdf, nsyms, ftb, str) \
+  od_ec_decode_cdf_unscaled_dyadic_(dec, cdf, nsyms, ftb, str)
+#define od_ec_dec_uint(dec, ft, str) od_ec_dec_uint_(dec, ft, str)
+#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb, str)
+#else
+#define OD_ACC_STR
+#define od_ec_decode_bool(dec, fz, ft, str) od_ec_decode_bool_(dec, fz, ft)
+#define od_ec_decode_bool_q15(dec, fz, str) od_ec_decode_bool_q15_(dec, fz)
+#define od_ec_decode_cdf(dec, cdf, nsyms, str) \
+  od_ec_decode_cdf_(dec, cdf, nsyms)
+#define od_ec_decode_cdf_q15(dec, cdf, nsyms, str) \
+  od_ec_decode_cdf_q15_(dec, cdf, nsyms)
+#define od_ec_decode_cdf_unscaled(dec, cdf, nsyms, str) \
+  od_ec_decode_cdf_unscaled_(dec, cdf, nsyms)
+#define od_ec_decode_cdf_unscaled_dyadic(dec, cdf, nsyms, ftb, str) \
+  od_ec_decode_cdf_unscaled_dyadic_(dec, cdf, nsyms, ftb)
+#define od_ec_dec_uint(dec, ft, str) od_ec_dec_uint_(dec, ft)
+#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb)
+#endif
+
+/*The entropy decoder context.*/
+struct od_ec_dec {
+  /*The start of the current input buffer.*/
+  const unsigned char *buf;
+  /*The read pointer for the raw bits.*/
+  const unsigned char *eptr;
+  /*Bits that will be read from/written at the end.*/
+  od_ec_window end_window;
+  /*Number of valid bits in end_window.*/
+  int nend_bits;
+  /*An offset used to keep track of tell after reaching the end of the stream.
+    This is constant throughout most of the decoding process, but becomes
+     important once we hit the end of the buffer and stop incrementing pointers
+     (and instead pretend cnt/nend_bits have lots of bits).*/
+  int32_t tell_offs;
+  /*The end of the current input buffer.*/
+  const unsigned char *end;
+  /*The read pointer for the entropy-coded bits.*/
+  const unsigned char *bptr;
+  /*The difference between the coded value and the low end of the current
+     range.*/
+  od_ec_window dif;
+  /*The number of values in the current range.*/
+  uint16_t rng;
+  /*The number of bits of data in the current value.*/
+  int16_t cnt;
+  /*Nonzero if an error occurred.*/
+  int error;
+#if OD_ACCOUNTING
+  od_accounting_internal *acct;
+#endif
+};
+
+/*See entdec.c for further documentation.*/
+
+void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf, uint32_t storage)
+    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+
+OD_WARN_UNUSED_RESULT int od_ec_decode_bool_(od_ec_dec *dec, unsigned fz,
+                                             unsigned ft OD_ACC_STR)
+    OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT int od_ec_decode_bool_q15_(od_ec_dec *dec,
+                                                 unsigned fz OD_ACC_STR)
+    OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_(od_ec_dec *dec, const uint16_t *cdf,
+                                            int nsyms OD_ACC_STR)
+    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_q15_(od_ec_dec *dec,
+                                                const uint16_t *cdf,
+                                                int nsyms OD_ACC_STR)
+    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled_(od_ec_dec *dec,
+                                                     const uint16_t *cdf,
+                                                     int nsyms OD_ACC_STR)
+    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled_dyadic_(
+    od_ec_dec *dec, const uint16_t *cdf, int nsyms, unsigned _ftb OD_ACC_STR)
+    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+
+OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_uint_(od_ec_dec *dec,
+                                               uint32_t ft OD_ACC_STR)
+    OD_ARG_NONNULL(1);
+
+OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits_(od_ec_dec *dec,
+                                               unsigned ftb OD_ACC_STR)
+    OD_ARG_NONNULL(1);
+
+OD_WARN_UNUSED_RESULT int od_ec_dec_tell(od_ec_dec *dec) OD_ARG_NONNULL(1);
+uint32_t od_ec_dec_tell_frac(od_ec_dec *dec) OD_ARG_NONNULL(1);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
diff --git a/aom_dsp/entenc.c b/aom_dsp/entenc.c
new file mode 100644
index 000000000..564086b49
--- /dev/null
+++ b/aom_dsp/entenc.c
@@ -0,0 +1,698 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include "entenc.h"
+
+/*A range encoder.
+  See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
+
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video \& Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul,
+   URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
+  }*/
+
+/*Takes updated low and range values, renormalizes them so that
+   32768 <= rng < 65536 (flushing bytes from low to the pre-carry buffer if
+   necessary), and stores them back in the encoder context.
+  low: The new value of low.
+  rng: The new value of the range.*/
+static void od_ec_enc_normalize(od_ec_enc *enc, od_ec_window low,
+                                unsigned rng) {
+  int d;
+  int c;
+  int s;
+  c = enc->cnt;
+  OD_ASSERT(rng <= 65535U);
+  d = 16 - OD_ILOG_NZ(rng);
+  s = c + d;
+  /*TODO: Right now we flush every time we have at least one byte available.
+    Instead we should use an od_ec_window and flush right before we're about to
+     shift bits off the end of the window.
+    For a 32-bit window this is about the same amount of work, but for a 64-bit
+     window it should be a fair win.*/
+  if (s >= 0) {
+    uint16_t *buf;
+    uint32_t storage;
+    uint32_t offs;
+    unsigned m;
+    buf = enc->precarry_buf;
+    storage = enc->precarry_storage;
+    offs = enc->offs;
+    if (offs + 2 > storage) {
+      storage = 2 * storage + 2;
+      buf = (uint16_t *)realloc(buf, sizeof(*buf) * storage);
+      if (buf == NULL) {
+        enc->error = -1;
+        enc->offs = 0;
+        return;
+      }
+      enc->precarry_buf = buf;
+      enc->precarry_storage = storage;
+    }
+    c += 16;
+    m = (1 << c) - 1;
+    if (s >= 8) {
+      OD_ASSERT(offs < storage);
+      buf[offs++] = (uint16_t)(low >> c);
+      low &= m;
+      c -= 8;
+      m >>= 8;
+    }
+    OD_ASSERT(offs < storage);
+    buf[offs++] = (uint16_t)(low >> c);
+    s = c + d - 24;
+    low &= m;
+    enc->offs = offs;
+  }
+  enc->low = low << d;
+  enc->rng = rng << d;
+  enc->cnt = s;
+}
+
+/*Initializes the encoder.
+  size: The initial size of the buffer, in bytes.*/
+void od_ec_enc_init(od_ec_enc *enc, uint32_t size) {
+  od_ec_enc_reset(enc);
+  enc->buf = (unsigned char *)malloc(sizeof(*enc->buf) * size);
+  enc->storage = size;
+  if (size > 0 && enc->buf == NULL) {
+    enc->storage = 0;
+    enc->error = -1;
+  }
+  enc->precarry_buf = (uint16_t *)malloc(sizeof(*enc->precarry_buf) * size);
+  enc->precarry_storage = size;
+  if (size > 0 && enc->precarry_buf == NULL) {
+    enc->precarry_storage = 0;
+    enc->error = -1;
+  }
+}
+
+/*Reinitializes the encoder.*/
+void od_ec_enc_reset(od_ec_enc *enc) {
+  enc->end_offs = 0;
+  enc->end_window = 0;
+  enc->nend_bits = 0;
+  enc->offs = 0;
+  enc->low = 0;
+  enc->rng = 0x8000;
+  /*This is initialized to -9 so that it crosses zero after we've accumulated
+     one byte + one carry bit.*/
+  enc->cnt = -9;
+  enc->error = 0;
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy = 0;
+  enc->nb_symbols = 0;
+#endif
+}
+
+/*Frees the buffers used by the encoder.*/
+void od_ec_enc_clear(od_ec_enc *enc) {
+  free(enc->precarry_buf);
+  free(enc->buf);
+}
+
+/*Encodes a symbol given its scaled frequency information.
+  The frequency information must be discernable by the decoder, assuming it
+   has read only the previous symbols from the stream.
+  You can change the frequency information, or even the entire source alphabet,
+   so long as the decoder can tell from the context of the previously encoded
+   information that it is supposed to do so as well.
+  fl: The cumulative frequency of all symbols that come before the one to be
+       encoded.
+  fh: The cumulative frequency of all symbols up to and including the one to
+       be encoded.
+      Together with fl, this defines the range [fl, fh) in which the decoded
+       value will fall.
+  ft: The sum of the frequencies of all the symbols.
+      This must be at least 16384, and no more than 32768.*/
+static void od_ec_encode(od_ec_enc *enc, unsigned fl, unsigned fh,
+                         unsigned ft) {
+  od_ec_window l;
+  unsigned r;
+  int s;
+  unsigned d;
+  unsigned u;
+  unsigned v;
+  OD_ASSERT(fl < fh);
+  OD_ASSERT(fh <= ft);
+  OD_ASSERT(16384 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  l = enc->low;
+  r = enc->rng;
+  OD_ASSERT(ft <= r);
+  s = r - ft >= ft;
+  ft <<= s;
+  fl <<= s;
+  fh <<= s;
+  d = r - ft;
+  OD_ASSERT(d < ft);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned e;
+    e = OD_SUBSATU(2 * d, ft);
+    u = fl + OD_MINI(fl, e) + OD_MINI(OD_SUBSATU(fl, e) >> 1, d);
+    v = fh + OD_MINI(fh, e) + OD_MINI(OD_SUBSATU(fh, e) >> 1, d);
+  }
+#else
+  u = fl + OD_MINI(fl, d);
+  v = fh + OD_MINI(fh, d);
+#endif
+  r = v - u;
+  l += u;
+  od_ec_enc_normalize(enc, l, r);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy -= OD_LOG2((double)(fh - fl) / ft);
+  enc->nb_symbols++;
+#endif
+}
+
+/*Encodes a symbol given its frequency in Q15.
+  This is like od_ec_encode() when ft == 32768, but is simpler and has lower
+   overhead.
+  Symbols encoded with this function cannot be properly decoded with
+   od_ec_decode(), and must be decoded with one of the equivalent _q15()
+   functions instead.
+  fl: The cumulative frequency of all symbols that come before the one to be
+       encoded.
+  fh: The cumulative frequency of all symbols up to and including the one to
+       be encoded.*/
+static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
+  od_ec_window l;
+  unsigned r;
+  unsigned u;
+  unsigned v;
+  OD_ASSERT(fl < fh);
+  OD_ASSERT(fh <= 32768U);
+  l = enc->low;
+  r = enc->rng;
+  OD_ASSERT(32768U <= r);
+  u = fl * (uint32_t)r >> 15;
+  v = fh * (uint32_t)r >> 15;
+  r = v - u;
+  l += u;
+  od_ec_enc_normalize(enc, l, r);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy -= OD_LOG2((double)(fh - fl) / 32768.);
+  enc->nb_symbols++;
+#endif
+}
+
+/*Encodes a symbol given its frequency information with an arbitrary scale.
+  This operates just like od_ec_encode(), but does not require that ft be at
+   least 16384.
+  fl: The cumulative frequency of all symbols that come before the one to be
+       encoded.
+  fh: The cumulative frequency of all symbols up to and including the one to
+       be encoded.
+  ft: The sum of the frequencies of all the symbols.
+      This must be at least 2 and no more than 32768.*/
+static void od_ec_encode_unscaled(od_ec_enc *enc, unsigned fl, unsigned fh,
+                                  unsigned ft) {
+  int s;
+  OD_ASSERT(fl < fh);
+  OD_ASSERT(fh <= ft);
+  OD_ASSERT(2 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  s = 15 - OD_ILOG_NZ(ft - 1);
+  od_ec_encode(enc, fl << s, fh << s, ft << s);
+}
+
+/*Encode a bit that has an fz/ft probability of being a zero.
+  val: The value to encode (0 or 1).
+  fz: The probability that val is zero, scaled by ft.
+  ft: The total probability.
+      This must be at least 16384 and no more than 32768.*/
+void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned ft) {
+  od_ec_window l;
+  unsigned r;
+  int s;
+  unsigned v;
+  OD_ASSERT(0 < fz);
+  OD_ASSERT(fz < ft);
+  OD_ASSERT(16384 <= ft);
+  OD_ASSERT(ft <= 32768U);
+  l = enc->low;
+  r = enc->rng;
+  OD_ASSERT(ft <= r);
+  s = r - ft >= ft;
+  ft <<= s;
+  fz <<= s;
+  OD_ASSERT(r - ft < ft);
+#if OD_EC_REDUCED_OVERHEAD
+  {
+    unsigned d;
+    unsigned e;
+    d = r - ft;
+    e = OD_SUBSATU(2 * d, ft);
+    v = fz + OD_MINI(fz, e) + OD_MINI(OD_SUBSATU(fz, e) >> 1, d);
+  }
+#else
+  v = fz + OD_MINI(fz, r - ft);
+#endif
+  if (val) l += v;
+  r = val ? r - v : v;
+  od_ec_enc_normalize(enc, l, r);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy -= OD_LOG2((double)(val ? ft - fz : fz) / ft);
+  enc->nb_symbols++;
+#endif
+}
+
+/*Encode a bit that has an fz probability of being a zero in Q15.
+  This is a simpler, lower overhead version of od_ec_encode_bool() for use when
+   ft == 32768.
+  Symbols encoded with this function cannot be properly decoded with
+   od_ec_decode(), and must be decoded with one of the equivalent _q15()
+   functions instead.
+  val: The value to encode (0 or 1).
+  fz: The probability that val is zero, scaled by 32768.*/
+void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz) {
+  od_ec_window l;
+  unsigned r;
+  unsigned v;
+  OD_ASSERT(0 < fz);
+  OD_ASSERT(fz < 32768U);
+  l = enc->low;
+  r = enc->rng;
+  OD_ASSERT(32768U <= r);
+  v = fz * (uint32_t)r >> 15;
+  if (val) l += v;
+  r = val ? r - v : v;
+  od_ec_enc_normalize(enc, l, r);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy -= OD_LOG2((double)(val ? 32768 - fz : fz) / 32768.);
+  enc->nb_symbols++;
+#endif
+}
+
+/*Encodes a symbol given a cumulative distribution function (CDF) table.
+  s: The index of the symbol to encode.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-decreasing, and the last value
+        must be at least 16384, and no more than 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.*/
+void od_ec_encode_cdf(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms) {
+  OD_ASSERT(s >= 0);
+  OD_ASSERT(s < nsyms);
+  od_ec_encode(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]);
+}
+
+/*Encodes a symbol given a cumulative distribution function (CDF) table in Q15.
+  This is a simpler, lower overhead version of od_ec_encode_cdf() for use when
+   cdf[nsyms - 1] == 32768.
+  Symbols encoded with this function cannot be properly decoded with
+   od_ec_decode(), and must be decoded with one of the equivalent _q15()
+   functions instead.
+  s: The index of the symbol to encode.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-decreasing, and the last value
+        must be exactly 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.*/
+void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *cdf,
+                          int nsyms) {
+  (void)nsyms;
+  OD_ASSERT(s >= 0);
+  OD_ASSERT(s < nsyms);
+  OD_ASSERT(cdf[nsyms - 1] == 32768U);
+  od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] : 0, cdf[s]);
+}
+
+/*Encodes a symbol given a cumulative distribution function (CDF) table.
+  s: The index of the symbol to encode.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-decreasing, and the last value
+        must be at least 2, and no more than 32768.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.*/
+void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s, const uint16_t *cdf,
+                               int nsyms) {
+  OD_ASSERT(s >= 0);
+  OD_ASSERT(s < nsyms);
+  od_ec_encode_unscaled(enc, s > 0 ? cdf[s - 1] : 0, cdf[s], cdf[nsyms - 1]);
+}
+
+/*Equivalent to od_ec_encode_cdf_q15() with the cdf scaled by
+   (1 << (15 - ftb)).
+  s: The index of the symbol to encode.
+  cdf: The CDF, such that symbol s falls in the range
+        [s > 0 ? cdf[s - 1] : 0, cdf[s]).
+       The values must be monotonically non-decreasing, and the last value
+        must be exactly 1 << ftb.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  ftb: The number of bits of precision in the cumulative distribution.
+       This must be no more than 15.*/
+void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s,
+                                      const uint16_t *cdf, int nsyms,
+                                      unsigned ftb) {
+  (void)nsyms;
+  OD_ASSERT(s >= 0);
+  OD_ASSERT(s < nsyms);
+  OD_ASSERT(ftb <= 15);
+  OD_ASSERT(cdf[nsyms - 1] == 1U << ftb);
+  od_ec_encode_q15(enc, s > 0 ? cdf[s - 1] << (15 - ftb) : 0,
+                   cdf[s] << (15 - ftb));
+}
+
+/*Encodes a raw unsigned integer in the stream.
+  fl: The integer to encode.
+  ft: The number of integers that can be encoded (one more than the max).
+      This must be at least 2, and no more than 2**29.*/
+void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) {
+  OD_ASSERT(ft >= 2);
+  OD_ASSERT(fl < ft);
+  OD_ASSERT(ft <= (uint32_t)1 << (25 + OD_EC_UINT_BITS));
+  if (ft > 1U << OD_EC_UINT_BITS) {
+    int ft1;
+    int ftb;
+    ft--;
+    ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS;
+    ft1 = (int)(ft >> ftb) + 1;
+    od_ec_encode_cdf_q15(enc, (int)(fl >> ftb), OD_UNIFORM_CDF_Q15(ft1), ft1);
+    od_ec_enc_bits(enc, fl & (((uint32_t)1 << ftb) - 1), ftb);
+  } else
+    od_ec_encode_cdf_q15(enc, (int)fl, OD_UNIFORM_CDF_Q15(ft), (int)ft);
+}
+
+/*Encodes a sequence of raw bits in the stream.
+  fl: The bits to encode.
+  ftb: The number of bits to encode.
+       This must be between 0 and 25, inclusive.*/
+void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb) {
+  od_ec_window end_window;
+  int nend_bits;
+  OD_ASSERT(ftb <= 25);
+  OD_ASSERT(fl < (uint32_t)1 << ftb);
+#if OD_MEASURE_EC_OVERHEAD
+  enc->entropy += ftb;
+#endif
+  end_window = enc->end_window;
+  nend_bits = enc->nend_bits;
+  if (nend_bits + ftb > OD_EC_WINDOW_SIZE) {
+    unsigned char *buf;
+    uint32_t storage;
+    uint32_t end_offs;
+    buf = enc->buf;
+    storage = enc->storage;
+    end_offs = enc->end_offs;
+    if (end_offs + (OD_EC_WINDOW_SIZE >> 3) >= storage) {
+      unsigned char *new_buf;
+      uint32_t new_storage;
+      new_storage = 2 * storage + (OD_EC_WINDOW_SIZE >> 3);
+      new_buf = (unsigned char *)malloc(sizeof(*new_buf) * new_storage);
+      if (new_buf == NULL) {
+        enc->error = -1;
+        enc->end_offs = 0;
+        return;
+      }
+      OD_COPY(new_buf + new_storage - end_offs, buf + storage - end_offs,
+              end_offs);
+      storage = new_storage;
+      free(buf);
+      enc->buf = buf = new_buf;
+      enc->storage = storage;
+    }
+    do {
+      OD_ASSERT(end_offs < storage);
+      buf[storage - ++end_offs] = (unsigned char)end_window;
+      end_window >>= 8;
+      nend_bits -= 8;
+    } while (nend_bits >= 8);
+    enc->end_offs = end_offs;
+  }
+  OD_ASSERT(nend_bits + ftb <= OD_EC_WINDOW_SIZE);
+  end_window |= (od_ec_window)fl << nend_bits;
+  nend_bits += ftb;
+  enc->end_window = end_window;
+  enc->nend_bits = nend_bits;
+}
+
+/*Overwrites a few bits at the very start of an existing stream, after they
+   have already been encoded.
+  This makes it possible to have a few flags up front, where it is easy for
+   decoders to access them without parsing the whole stream, even if their
+   values are not determined until late in the encoding process, without having
+   to buffer all the intermediate symbols in the encoder.
+  In order for this to work, at least nbits bits must have already been encoded
+   using probabilities that are an exact power of two.
+  The encoder can verify the number of encoded bits is sufficient, but cannot
+   check this latter condition.
+  val: The bits to encode (in the least nbits significant bits).
+       They will be decoded in order from most-significant to least.
+  nbits: The number of bits to overwrite.
+         This must be no more than 8.*/
+void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits) {
+  int shift;
+  unsigned mask;
+  OD_ASSERT(nbits >= 0);
+  OD_ASSERT(nbits <= 8);
+  OD_ASSERT(val < 1U << nbits);
+  shift = 8 - nbits;
+  mask = ((1U << nbits) - 1) << shift;
+  if (enc->offs > 0) {
+    /*The first byte has been finalized.*/
+    enc->precarry_buf[0] =
+        (uint16_t)((enc->precarry_buf[0] & ~mask) | val << shift);
+  } else if (9 + enc->cnt + (enc->rng == 0x8000) > nbits) {
+    /*The first byte has yet to be output.*/
+    enc->low = (enc->low & ~((od_ec_window)mask << (16 + enc->cnt))) |
+               (od_ec_window)val << (16 + enc->cnt + shift);
+  }
+  /*The encoder hasn't even encoded _nbits of data yet.*/
+  else
+    enc->error = -1;
+}
+
+#if OD_MEASURE_EC_OVERHEAD
+#include <stdio.h>
+#endif
+
+/*Indicates that there are no more symbols to encode.
+  All remaining output bytes are flushed to the output buffer.
+  od_ec_enc_reset() should be called before using the encoder again.
+  bytes: Returns the size of the encoded data in the returned buffer.
+  Return: A pointer to the start of the final buffer, or NULL if there was an
+           encoding error.*/
+unsigned char *od_ec_enc_done(od_ec_enc *enc, uint32_t *nbytes) {
+  unsigned char *out;
+  uint32_t storage;
+  uint16_t *buf;
+  uint32_t offs;
+  uint32_t end_offs;
+  int nend_bits;
+  od_ec_window m;
+  od_ec_window e;
+  od_ec_window l;
+  unsigned r;
+  int c;
+  int s;
+  if (enc->error) return NULL;
+#if OD_MEASURE_EC_OVERHEAD
+  {
+    uint32_t tell;
+    /* Don't count the 1 bit we lose to raw bits as overhead. */
+    tell = od_ec_enc_tell(enc) - 1;
+    fprintf(stderr, "overhead: %f%%\n",
+            100 * (tell - enc->entropy) / enc->entropy);
+    fprintf(stderr, "efficiency: %f bits/symbol\n",
+            (double)tell / enc->nb_symbols);
+  }
+#endif
+  /*We output the minimum number of bits that ensures that the symbols encoded
+     thus far will be decoded correctly regardless of the bits that follow.*/
+  l = enc->low;
+  r = enc->rng;
+  c = enc->cnt;
+  s = 9;
+  m = 0x7FFF;
+  e = (l + m) & ~m;
+  while ((e | m) >= l + r) {
+    s++;
+    m >>= 1;
+    e = (l + m) & ~m;
+  }
+  s += c;
+  offs = enc->offs;
+  buf = enc->precarry_buf;
+  if (s > 0) {
+    unsigned n;
+    storage = enc->precarry_storage;
+    if (offs + ((s + 7) >> 3) > storage) {
+      storage = storage * 2 + ((s + 7) >> 3);
+      buf = (uint16_t *)realloc(buf, sizeof(*buf) * storage);
+      if (buf == NULL) {
+        enc->error = -1;
+        return NULL;
+      }
+      enc->precarry_buf = buf;
+      enc->precarry_storage = storage;
+    }
+    n = (1 << (c + 16)) - 1;
+    do {
+      OD_ASSERT(offs < storage);
+      buf[offs++] = (uint16_t)(e >> (c + 16));
+      e &= n;
+      s -= 8;
+      c -= 8;
+      n >>= 8;
+    } while (s > 0);
+  }
+  /*Make sure there's enough room for the entropy-coded bits and the raw
+     bits.*/
+  out = enc->buf;
+  storage = enc->storage;
+  end_offs = enc->end_offs;
+  e = enc->end_window;
+  nend_bits = enc->nend_bits;
+  s = -s;
+  c = OD_MAXI((nend_bits - s + 7) >> 3, 0);
+  if (offs + end_offs + c > storage) {
+    storage = offs + end_offs + c;
+    out = (unsigned char *)realloc(out, sizeof(*out) * storage);
+    if (out == NULL) {
+      enc->error = -1;
+      return NULL;
+    }
+    OD_MOVE(out + storage - end_offs, out + enc->storage - end_offs, end_offs);
+    enc->buf = out;
+    enc->storage = storage;
+  }
+  /*If we have buffered raw bits, flush them as well.*/
+  while (nend_bits > s) {
+    OD_ASSERT(end_offs < storage);
+    out[storage - ++end_offs] = (unsigned char)e;
+    e >>= 8;
+    nend_bits -= 8;
+  }
+  *nbytes = offs + end_offs;
+  /*Perform carry propagation.*/
+  OD_ASSERT(offs + end_offs <= storage);
+  out = out + storage - (offs + end_offs);
+  c = 0;
+  end_offs = offs;
+  while (offs-- > 0) {
+    c = buf[offs] + c;
+    out[offs] = (unsigned char)c;
+    c >>= 8;
+  }
+  /*Add any remaining raw bits to the last byte.
+    There is guaranteed to be enough room, because nend_bits <= s.*/
+  OD_ASSERT(nend_bits <= 0 || end_offs > 0);
+  if (nend_bits > 0) out[end_offs - 1] |= (unsigned char)e;
+  /*Note: Unless there's an allocation error, if you keep encoding into the
+     current buffer and call this function again later, everything will work
+     just fine (you won't get a new packet out, but you will get a single
+     buffer with the new data appended to the old).
+    However, this function is O(N) where N is the amount of data coded so far,
+     so calling it more than once for a given packet is a bad idea.*/
+  return out;
+}
+
+/*Returns the number of bits "used" by the encoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Warning: The value returned by this function can decrease compared to an
+   earlier call, even after encoding more data, if there is an encoding error
+   (i.e., a failure to allocate enough space for the output buffer).
+  Return: The number of bits.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+int od_ec_enc_tell(od_ec_enc *enc) {
+  /*The 10 here counteracts the offset of -9 baked into cnt, and adds 1 extra
+     bit, which we reserve for terminating the stream.*/
+  return (enc->offs + enc->end_offs) * 8 + enc->cnt + enc->nend_bits + 10;
+}
+
+/*Returns the number of bits "used" by the encoded symbols so far.
+  This same number can be computed in either the encoder or the decoder, and is
+   suitable for making coding decisions.
+  Warning: The value returned by this function can decrease compared to an
+   earlier call, even after encoding more data, if there is an encoding error
+   (i.e., a failure to allocate enough space for the output buffer).
+  Return: The number of bits scaled by 2**OD_BITRES.
+          This will always be slightly larger than the exact value (e.g., all
+           rounding error is in the positive direction).*/
+uint32_t od_ec_enc_tell_frac(od_ec_enc *enc) {
+  return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng);
+}
+
+/*Saves a entropy coder checkpoint to dst.
+  This allows an encoder to reverse a series of entropy coder
+   decisions if it decides that the information would have been
+   better coded some other way.*/
+void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src) {
+  OD_COPY(dst, src, 1);
+}
+
+/*Restores an entropy coder checkpoint saved by od_ec_enc_checkpoint.
+  This can only be used to restore from checkpoints earlier in the target
+   state's history: you can not switch backwards and forwards or otherwise
+   switch to a state which isn't a casual ancestor of the current state.
+  Restore is also incompatible with patching the initial bits, as the
+   changes will remain in the restored version.*/
+void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src) {
+  unsigned char *buf;
+  uint32_t storage;
+  uint16_t *precarry_buf;
+  uint32_t precarry_storage;
+  OD_ASSERT(dst->storage >= src->storage);
+  OD_ASSERT(dst->precarry_storage >= src->precarry_storage);
+  buf = dst->buf;
+  storage = dst->storage;
+  precarry_buf = dst->precarry_buf;
+  precarry_storage = dst->precarry_storage;
+  OD_COPY(dst, src, 1);
+  dst->buf = buf;
+  dst->storage = storage;
+  dst->precarry_buf = precarry_buf;
+  dst->precarry_storage = precarry_storage;
+}
diff --git a/aom_dsp/entenc.h b/aom_dsp/entenc.h
new file mode 100644
index 000000000..5df209222
--- /dev/null
+++ b/aom_dsp/entenc.h
@@ -0,0 +1,115 @@
+/*Daala video codec
+Copyright (c) 2001-2013 Daala project contributors.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_entenc_H)
+#define _entenc_H (1)
+#include <stddef.h>
+#include "entcode.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct od_ec_enc od_ec_enc;
+
+#define OD_MEASURE_EC_OVERHEAD (0)
+
+/*The entropy encoder context.*/
+struct od_ec_enc {
+  /*Buffered output.
+    This contains only the raw bits until the final call to od_ec_enc_done(),
+     where all the arithmetic-coded data gets prepended to it.*/
+  unsigned char *buf;
+  /*The size of the buffer.*/
+  uint32_t storage;
+  /*The offset at which the last byte containing raw bits was written.*/
+  uint32_t end_offs;
+  /*Bits that will be read from/written at the end.*/
+  od_ec_window end_window;
+  /*Number of valid bits in end_window.*/
+  int nend_bits;
+  /*A buffer for output bytes with their associated carry flags.*/
+  uint16_t *precarry_buf;
+  /*The size of the pre-carry buffer.*/
+  uint32_t precarry_storage;
+  /*The offset at which the next entropy-coded byte will be written.*/
+  uint32_t offs;
+  /*The low end of the current range.*/
+  od_ec_window low;
+  /*The number of values in the current range.*/
+  uint16_t rng;
+  /*The number of bits of data in the current value.*/
+  int16_t cnt;
+  /*Nonzero if an error occurred.*/
+  int error;
+#if OD_MEASURE_EC_OVERHEAD
+  double entropy;
+  int nb_symbols;
+#endif
+};
+
+/*See entenc.c for further documentation.*/
+
+void od_ec_enc_init(od_ec_enc *enc, uint32_t size) OD_ARG_NONNULL(1);
+void od_ec_enc_reset(od_ec_enc *enc) OD_ARG_NONNULL(1);
+void od_ec_enc_clear(od_ec_enc *enc) OD_ARG_NONNULL(1);
+
+void od_ec_encode_bool(od_ec_enc *enc, int val, unsigned fz, unsigned _ft)
+    OD_ARG_NONNULL(1);
+void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz_q15)
+    OD_ARG_NONNULL(1);
+void od_ec_encode_cdf(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms)
+    OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
+void od_ec_encode_cdf_q15(od_ec_enc *enc, int s, const uint16_t *cdf, int nsyms)
+    OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
+void od_ec_encode_cdf_unscaled(od_ec_enc *enc, int s, const uint16_t *cdf,
+                               int nsyms) OD_ARG_NONNULL(1) OD_ARG_NONNULL(3);
+void od_ec_encode_cdf_unscaled_dyadic(od_ec_enc *enc, int s,
+                                      const uint16_t *cdf, int nsyms,
+                                      unsigned ftb) OD_ARG_NONNULL(1)
+    OD_ARG_NONNULL(3);
+
+void od_ec_enc_uint(od_ec_enc *enc, uint32_t fl, uint32_t ft) OD_ARG_NONNULL(1);
+
+void od_ec_enc_bits(od_ec_enc *enc, uint32_t fl, unsigned ftb)
+    OD_ARG_NONNULL(1);
+
+void od_ec_enc_patch_initial_bits(od_ec_enc *enc, unsigned val, int nbits)
+    OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT unsigned char *od_ec_enc_done(od_ec_enc *enc,
+                                                    uint32_t *nbytes)
+    OD_ARG_NONNULL(1) OD_ARG_NONNULL(2);
+
+OD_WARN_UNUSED_RESULT int od_ec_enc_tell(od_ec_enc *enc) OD_ARG_NONNULL(1);
+OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(od_ec_enc *enc)
+    OD_ARG_NONNULL(1);
+
+void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src);
+void od_ec_enc_rollback(od_ec_enc *dst, const od_ec_enc *src);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
diff --git a/av1/common/odintrin.h b/av1/common/odintrin.h
index 5324cff8c..0036535e7 100644
--- a/av1/common/odintrin.h
+++ b/av1/common/odintrin.h
@@ -48,6 +48,7 @@ extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
   (((_d) < OD_DIVU_DMAX) ? (OD_DIVU_SMALL((_x), (_d))) : ((_x) / (_d)))
 
 #define OD_MINI AOMMIN
+#define OD_MAXI AOMMAX
 #define OD_CLAMPI(min, val, max) clamp((val), (min), (max))
 
 #define OD_CLZ0 (1)
@@ -59,6 +60,73 @@ extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
   We define a special version of the macro to use when x can be zero.*/
 #define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0)
 
+#define OD_LOG2 AOMLOG2
+
+/*Enable special features for gcc and compatible compilers.*/
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+#define OD_GNUC_PREREQ(maj, min, pat)                                \
+  ((__GNUC__ << 16) + (__GNUC_MINOR__ << 8) + __GNUC_PATCHLEVEL__ >= \
+   ((maj) << 16) + ((min) << 8) + pat)
+#else
+#define OD_GNUC_PREREQ(maj, min, pat) (0)
+#endif
+
+#if OD_GNUC_PREREQ(3, 4, 0)
+#define OD_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
+#else
+#define OD_WARN_UNUSED_RESULT
+#endif
+
+#if OD_GNUC_PREREQ(3, 4, 0)
+#define OD_ARG_NONNULL(x) __attribute__((__nonnull__(x)))
+#else
+#define OD_ARG_NONNULL(x)
+#endif
+
+#if defined(OD_ENABLE_ASSERTIONS)
+#if OD_GNUC_PREREQ(2, 5, 0)
+__attribute__((noreturn))
+#endif
+void od_fatal_impl(const char *_str, const char *_file, int _line);
+
+#define OD_FATAL(_str) (od_fatal_impl(_str, __FILE__, __LINE__))
+
+#define OD_ASSERT(_cond)                     \
+  do {                                       \
+    if (!(_cond)) {                          \
+      OD_FATAL("assertion failed: " #_cond); \
+    }                                        \
+  } while (0)
+
+#define OD_ASSERT2(_cond, _message)                        \
+  do {                                                     \
+    if (!(_cond)) {                                        \
+      OD_FATAL("assertion failed: " #_cond "\n" _message); \
+    }                                                      \
+  } while (0)
+
+#define OD_ALWAYS_TRUE(_cond) OD_ASSERT(_cond)
+
+#else
+#define OD_ASSERT(_cond)
+#define OD_ASSERT2(_cond, _message)
+#define OD_ALWAYS_TRUE(_cond) ((void)(_cond))
+#endif
+
+/** Copy n elements of memory from src to dst. The 0* term provides
+    compile-time type checking  */
+#if !defined(OVERRIDE_OD_COPY)
+#define OD_COPY(dst, src, n) \
+  (memcpy((dst), (src), sizeof(*(dst)) * (n) + 0 * ((dst) - (src))))
+#endif
+
+/** Copy n elements of memory from src to dst, allowing overlapping regions.
+    The 0* term provides compile-time type checking */
+#if !defined(OVERRIDE_OD_MOVE)
+#define OD_MOVE(dst, src, n) \
+  (memmove((dst), (src), sizeof(*(dst)) * (n) + 0 * ((dst) - (src))))
+#endif
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/configure b/configure
index e84ccca0e..6ea406332 100755
--- a/configure
+++ b/configure
@@ -279,6 +279,7 @@ EXPERIMENT_LIST="
     bitstream_debug
     alt_intra
     palette
+    daala_ec
 "
 CONFIG_LIST="
     dependency_tracking