From: Alex Converse Date: Wed, 16 Dec 2015 19:16:32 +0000 (-0800) Subject: Add an implementation of Asymetric Numeral Systems (ANS). X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9ffcb469fb9748c7a5f9ececd82ee5a3c5c1c990;p=libvpx Add an implementation of Asymetric Numeral Systems (ANS). Change-Id: Ie41bc72127e700887566dcc951da9d83a0b94891 --- diff --git a/configure b/configure index 29b1da8c6..f94546e7e 100755 --- a/configure +++ b/configure @@ -281,6 +281,7 @@ EXPERIMENT_LIST=" ext_interp ext_refs supertx + ans " CONFIG_LIST=" dependency_tracking diff --git a/test/test.mk b/test/test.mk index 471f87021..81f20fb76 100644 --- a/test/test.mk +++ b/test/test.mk @@ -165,6 +165,7 @@ ifeq ($(CONFIG_VP10),yes) LIBVPX_TEST_SRCS-yes += vp10_inv_txfm_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc endif # VP10 diff --git a/test/vp10_ans_test.cc b/test/vp10_ans_test.cc new file mode 100644 index 000000000..441583ad1 --- /dev/null +++ b/test/vp10_ans_test.cc @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include +#include +#include + +#include "third_party/googletest/src/include/gtest/gtest.h" + +#include "test/acm_random.h" +#include "vp10/common/ans.h" +#include "vp10/encoder/treewriter.h" +#include "vpx_dsp/bitreader.h" +#include "vpx_dsp/bitwriter.h" + +namespace { +typedef std::vector > PvVec; + +PvVec abs_encode_build_vals(int iters) { + PvVec ret; + libvpx_test::ACMRandom gen(0x30317076); + double entropy = 0; + for (int i = 0; i < iters; ++i) { + uint8_t p; + do { + p = gen.Rand8(); + } while (p == 0); // zero is not a valid coding probability + bool b = gen.Rand8() < p; + ret.push_back(std::make_pair(static_cast(p), b)); + double d = p / 256.; + entropy += -d * log2(d) - (1 - d) * log2(1 - d); + } + printf("entropy %f\n", entropy); + return ret; +} + +bool check_rabs(const PvVec &pv_vec, uint8_t *buf) { + AnsCoder a; + ans_write_init(&a, buf); + + std::clock_t start = std::clock(); + for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend(); + ++it) { + rabs_write(&a, it->second, 256 - it->first); + } + std::clock_t enc_time = std::clock() - start; + int offset = ans_write_end(&a); + bool okay = true; + AnsDecoder d; + if (ans_read_init(&d, buf, offset)) return false; + start = std::clock(); + for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) { + okay &= rabs_read(&d, 256 - it->first) == it->second; + } + std::clock_t dec_time = std::clock() - start; + if (!okay) return false; + printf("rABS size %d enc_time %f dec_time %f\n", offset, + static_cast(enc_time) / CLOCKS_PER_SEC, + static_cast(dec_time) / CLOCKS_PER_SEC); + return ans_read_end(&d); +} + +bool check_rabs_asc(const PvVec &pv_vec, uint8_t *buf) { + AnsCoder a; + ans_write_init(&a, buf); + + std::clock_t start = std::clock(); + for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend(); + ++it) { + rabs_asc_write(&a, it->second, 256 - it->first); + } + std::clock_t enc_time = std::clock() - start; + int offset = ans_write_end(&a); + bool okay = true; + AnsDecoder d; + if (ans_read_init(&d, buf, offset)) return false; + start = std::clock(); + for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) { + okay &= rabs_asc_read(&d, 256 - it->first) == it->second; + } + std::clock_t dec_time = std::clock() - start; + if (!okay) return false; + printf("rABS (asc) size %d enc_time %f dec_time %f\n", offset, + static_cast(enc_time) / CLOCKS_PER_SEC, + static_cast(dec_time) / CLOCKS_PER_SEC); + return ans_read_end(&d); +} + +bool check_uabs(const PvVec &pv_vec, uint8_t *buf) { + AnsCoder a; + ans_write_init(&a, buf); + + std::clock_t start = std::clock(); + for (PvVec::const_reverse_iterator it = pv_vec.rbegin(); it != pv_vec.rend(); + ++it) { + uabs_write(&a, it->second, 256 - it->first); + } + std::clock_t enc_time = std::clock() - start; + int offset = ans_write_end(&a); + bool okay = true; + AnsDecoder d; + if (ans_read_init(&d, buf, offset)) return false; + start = std::clock(); + for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) { + okay &= uabs_read(&d, 256 - it->first) == it->second; + } + std::clock_t dec_time = std::clock() - start; + if (!okay) return false; + printf("uABS size %d enc_time %f dec_time %f\n", offset, + static_cast(enc_time) / CLOCKS_PER_SEC, + static_cast(dec_time) / CLOCKS_PER_SEC); + return ans_read_end(&d); +} + +bool check_vpxbool(const PvVec &pv_vec, uint8_t *buf) { + vpx_writer w; + vpx_reader r; + vpx_start_encode(&w, buf); + + std::clock_t start = std::clock(); + for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) { + vpx_write(&w, it->second, 256 - it->first); + } + std::clock_t enc_time = std::clock() - start; + vpx_stop_encode(&w); + bool okay = true; + vpx_reader_init(&r, buf, w.pos, NULL, NULL); + start = std::clock(); + for (PvVec::const_iterator it = pv_vec.begin(); it != pv_vec.end(); ++it) { + okay &= vpx_read(&r, 256 - it->first) == it->second; + } + std::clock_t dec_time = std::clock() - start; + printf("VPX size %d enc_time %f dec_time %f\n", w.pos, + static_cast(enc_time) / CLOCKS_PER_SEC, + static_cast(dec_time) / CLOCKS_PER_SEC); + return okay; +} + +const rans_sym rans_sym_tab[] = { + {70, 186}, {70, 116}, {100, 16}, {16, 0}, +}; +const int kDistinctSyms = sizeof(rans_sym_tab) / sizeof(rans_sym_tab[0]); + +std::vector ans_encode_build_vals(const rans_sym *tab, int iters) { + std::vector p_to_sym; + int i = 0; + while (p_to_sym.size() < 256) { + p_to_sym.insert(p_to_sym.end(), tab[i].prob, i); + ++i; + } + assert(p_to_sym.size() == 256); + std::vector ret; + libvpx_test::ACMRandom gen(18543637); + for (int i = 0; i < iters; ++i) { + int sym = p_to_sym[gen.Rand8()]; + ret.push_back(sym); + } + return ret; +} + +void rans_build_dec_tab(const struct rans_sym sym_tab[], + rans_dec_lut dec_tab) { + int val = 0; + int i; + for (i = ans_p8_precision - 1; i >= 0; --i) { + dec_tab[i].val = val; + dec_tab[i].prob = sym_tab[val].prob; + dec_tab[i].cum_prob = sym_tab[val].cum_prob; + if (i == sym_tab[val].cum_prob) ++val; + } +} + +bool check_rans(const std::vector &sym_vec, const rans_sym *const tab, + uint8_t *buf) { + AnsCoder a; + ans_write_init(&a, buf); + rans_dec_lut dec_tab; + rans_build_dec_tab(tab, dec_tab); + + std::clock_t start = std::clock(); + for (std::vector::const_reverse_iterator it = sym_vec.rbegin(); + it != sym_vec.rend(); ++it) { + rans_write(&a, &tab[*it]); + } + std::clock_t enc_time = std::clock() - start; + int offset = ans_write_end(&a); + bool okay = true; + AnsDecoder d; + if (ans_read_init(&d, buf, offset)) return false; + start = std::clock(); + for (std::vector::const_iterator it = sym_vec.begin(); + it != sym_vec.end(); ++it) { + okay &= rans_read(&d, dec_tab) == *it; + } + std::clock_t dec_time = std::clock() - start; + if (!okay) return false; + printf("rANS size %d enc_time %f dec_time %f\n", offset, + static_cast(enc_time) / CLOCKS_PER_SEC, + static_cast(dec_time) / CLOCKS_PER_SEC); + return ans_read_end(&d); +} + +void build_tree(vpx_tree_index *tree, int num_syms) { + vpx_tree_index i; + int sym = 0; + for (i = 0; i < num_syms - 1; ++i) { + tree[2 * i] = sym--; + tree[2 * i + 1] = 2 * (i + 1); + } + tree[2 * i - 1] = sym; +} + +// treep are the probabilites of tree nodes like: +// * +// / \ +// -sym0 * +// / \ +// -sym1 * +// / \ +// -sym2 -sym3 +void tab2tree(const rans_sym *tab, int tab_size, vpx_prob *treep) { + const unsigned basep = 256; + unsigned pleft = basep; + for (int i = 0; i < tab_size - 1; ++i) { + unsigned prob = (tab[i].prob * basep + (basep / 2)) / pleft; + assert(prob > 0 && prob < 256); + treep[i] = prob; + pleft -= tab[i].prob; + } +} + +struct sym_bools { + unsigned bits; + int len; +}; + +static void make_tree_bits_tab(sym_bools *tab, int num_syms) { + unsigned bits = 0; + int len = 0; + int i; + for (i = 0; i < num_syms - 1; ++i) { + bits *= 2; + ++len; + tab[i].bits = bits; + tab[i].len = len; + ++bits; + } + tab[i].bits = bits; + tab[i].len = len; +} + +void build_tpb(vpx_prob probs[/*num_syms*/], + vpx_tree_index tree[/*2*num_syms*/], + sym_bools bit_len[/*num_syms*/], + const rans_sym sym_tab[/*num_syms*/], int num_syms) { + tab2tree(sym_tab, num_syms, probs); + build_tree(tree, num_syms); + make_tree_bits_tab(bit_len, num_syms); +} + +bool check_vpxtree(const std::vector &sym_vec, const rans_sym *sym_tab, + uint8_t *buf) { + vpx_writer w; + vpx_reader r; + vpx_start_encode(&w, buf); + + vpx_prob probs[kDistinctSyms]; + vpx_tree_index tree[2 * kDistinctSyms]; + sym_bools bit_len[kDistinctSyms]; + build_tpb(probs, tree, bit_len, sym_tab, kDistinctSyms); + + std::clock_t start = std::clock(); + for (std::vector::const_iterator it = sym_vec.begin(); + it != sym_vec.end(); ++it) { + vp10_write_tree(&w, tree, probs, bit_len[*it].bits, bit_len[*it].len, 0); + } + std::clock_t enc_time = std::clock() - start; + vpx_stop_encode(&w); + vpx_reader_init(&r, buf, w.pos, NULL, NULL); + start = std::clock(); + for (std::vector::const_iterator it = sym_vec.begin(); + it != sym_vec.end(); ++it) { + if (vpx_read_tree(&r, tree, probs) != *it) return false; + } + std::clock_t dec_time = std::clock() - start; + printf("VPXtree size %u enc_time %f dec_time %f\n", w.pos, + static_cast(enc_time) / CLOCKS_PER_SEC, + static_cast(dec_time) / CLOCKS_PER_SEC); + return true; +} + +class Vp10AbsTest : public ::testing::Test { + protected: + static void SetUpTestCase() { pv_vec_ = abs_encode_build_vals(kNumBools); } + virtual void SetUp() { buf_ = new uint8_t[kNumBools / 8]; } + virtual void TearDown() { delete[] buf_; } + static const int kNumBools = 100000000; + static PvVec pv_vec_; + uint8_t *buf_; +}; +PvVec Vp10AbsTest::pv_vec_; + +class Vp10AnsTest : public ::testing::Test { + protected: + static void SetUpTestCase() { + sym_vec_ = ans_encode_build_vals(rans_sym_tab, kNumSyms); + } + virtual void SetUp() { buf_ = new uint8_t[kNumSyms / 2]; } + virtual void TearDown() { delete[] buf_; } + static const int kNumSyms = 25000000; + static std::vector sym_vec_; + uint8_t *buf_; +}; +std::vector Vp10AnsTest::sym_vec_; + +TEST_F(Vp10AbsTest, Vpxbool) { EXPECT_TRUE(check_vpxbool(pv_vec_, buf_)); } +TEST_F(Vp10AbsTest, Rabs) { EXPECT_TRUE(check_rabs(pv_vec_, buf_)); } +TEST_F(Vp10AbsTest, RabsAsc) { EXPECT_TRUE(check_rabs_asc(pv_vec_, buf_)); } +TEST_F(Vp10AbsTest, Uabs) { EXPECT_TRUE(check_uabs(pv_vec_, buf_)); } + +TEST_F(Vp10AnsTest, Rans) { + EXPECT_TRUE(check_rans(sym_vec_, rans_sym_tab, buf_)); +} +TEST_F(Vp10AnsTest, Vpxtree) { + EXPECT_TRUE(check_vpxtree(sym_vec_, rans_sym_tab, buf_)); +} +} // namespace diff --git a/vp10/common/ans.h b/vp10/common/ans.h new file mode 100644 index 000000000..fbc9a45ca --- /dev/null +++ b/vp10/common/ans.h @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_COMMON_ANS_H_ +#define VP10_COMMON_ANS_H_ +// An implementation of Asymmetric Numeral Systems +// http://arxiv.org/abs/1311.2540v2 + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem_ops.h" + +#define ANS_DIVIDE_BY_MULTIPLY 1 +#if ANS_DIVIDE_BY_MULTIPLY +#include "vp10/common/divide.h" +#define ANS_DIVREM(quotient, remainder, dividend, divisor) \ + do { \ + quotient = fastdiv(dividend, divisor); \ + remainder = dividend - quotient * divisor; \ + } while (0) +#define ANS_DIV(dividend, divisor) \ + fastdiv(dividend, divisor) +#else +#define ANS_DIVREM(quotient, remainder, dividend, divisor) \ + do { \ + quotient = dividend / divisor; \ + remainder = dividend % divisor; \ + } while (0) +#define ANS_DIV(dividend, divisor) \ + ((dividend) / (divisor)) +#endif + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +struct AnsCoder { + uint8_t *buf; + int buf_offset; + uint32_t state; +}; + +struct AnsDecoder { + const uint8_t *buf; + int buf_offset; + uint32_t state; +}; + +typedef uint8_t AnsP8; +#define ans_p8_precision 256u +#define ans_p8_shift 8 +#define l_base (ans_p8_precision * 4) // l_base % precision must be 0 +#define io_base 256 +// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 } + +static INLINE void ans_write_init(struct AnsCoder *const ans, + uint8_t *const buf) { + ans->buf = buf; + ans->buf_offset = 0; + ans->state = l_base; +} + +static INLINE int ans_write_end(struct AnsCoder *const ans) { + mem_put_le24(ans->buf + ans->buf_offset, ans->state); + return ans->buf_offset + 3; +} + +// rABS with descending spread +// p or p0 takes the place of l_s from the paper +// ans_p8_precision is m +static INLINE void rabs_desc_write(struct AnsCoder *ans, int val, AnsP8 p0) { + const AnsP8 p = ans_p8_precision - p0; + const unsigned l_s = val ? p : p0; + unsigned quot, rem; + if (ans->state >= l_base / ans_p8_precision * io_base * l_s) { + ans->buf[ans->buf_offset++] = ans->state % io_base; + ans->state /= io_base; + } + ANS_DIVREM(quot, rem, ans->state, l_s); + ans->state = quot * ans_p8_precision + rem + (val ? 0 : p); +} + +#define ANS_IMPL1 0 +#define UNPREDICTABLE(x) x +static INLINE int rabs_desc_read(struct AnsDecoder *ans, AnsP8 p0) { + int val; +#if ANS_IMPL1 + unsigned l_s; +#else + unsigned quot, rem, x, xn; +#endif + const AnsP8 p = ans_p8_precision - p0; + if (ans->state < l_base) { + ans->state = ans->state * io_base + ans->buf[--ans->buf_offset]; + } +#if ANS_IMPL1 + val = ans->state % ans_p8_precision < p; + l_s = val ? p : p0; + ans->state = (ans->state / ans_p8_precision) * l_s + + ans->state % ans_p8_precision - (!val * p); +#else + x = ans->state; + quot = x / ans_p8_precision; + rem = x % ans_p8_precision; + xn = quot * p; + val = rem < p; + if (UNPREDICTABLE(val)) { + ans->state = xn + rem; + } else { + // ans->state = quot * p0 + rem - p; + ans->state = x - xn - p; + } +#endif + return val; +} + +// rABS with ascending spread +// p or p0 takes the place of l_s from the paper +// ans_p8_precision is m +static INLINE void rabs_asc_write(struct AnsCoder *ans, int val, AnsP8 p0) { + const AnsP8 p = ans_p8_precision - p0; + const unsigned l_s = val ? p : p0; + unsigned quot, rem; + if (ans->state >= l_base / ans_p8_precision * io_base * l_s) { + ans->buf[ans->buf_offset++] = ans->state % io_base; + ans->state /= io_base; + } + ANS_DIVREM(quot, rem, ans->state, l_s); + ans->state = quot * ans_p8_precision + rem + (val ? p0 : 0); +} + +static INLINE int rabs_asc_read(struct AnsDecoder *ans, AnsP8 p0) { + int val; +#if ANS_IMPL1 + unsigned l_s; +#else + unsigned quot, rem, x, xn; +#endif + const AnsP8 p = ans_p8_precision - p0; + if (ans->state < l_base) { + ans->state = ans->state * io_base + ans->buf[--ans->buf_offset]; + } +#if ANS_IMPL1 + val = ans->state % ans_p8_precision < p; + l_s = val ? p : p0; + ans->state = (ans->state / ans_p8_precision) * l_s + + ans->state % ans_p8_precision - (!val * p); +#else + x = ans->state; + quot = x / ans_p8_precision; + rem = x % ans_p8_precision; + xn = quot * p; + val = rem >= p0; + if (UNPREDICTABLE(val)) { + ans->state = xn + rem - p0; + } else { + // ans->state = quot * p0 + rem - p0; + ans->state = x - xn; + } +#endif + return val; +} + +#define rabs_read rabs_desc_read +#define rabs_write rabs_desc_write + +// uABS with normalization +static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) { + AnsP8 p = ans_p8_precision - p0; + const unsigned l_s = val ? p : p0; + if (ans->state >= l_base / ans_p8_precision * io_base * l_s) { + ans->buf[ans->buf_offset++] = ans->state % io_base; + ans->state /= io_base; + } + if (!val) + ans->state = ANS_DIV(ans->state * ans_p8_precision, p0); + else + ans->state = ANS_DIV((ans->state + 1) * ans_p8_precision + p - 1, p) - 1; +} + +static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) { + AnsP8 p = ans_p8_precision - p0; + int s; + // unsigned int xp1; + unsigned xp, sp; + unsigned state = ans->state; + if (state < l_base && ans->buf_offset > 0) { + state = state * io_base + ans->buf[--ans->buf_offset]; + } + sp = state * p; + // xp1 = (sp + p) / ans_p8_precision; + xp = sp / ans_p8_precision; + // s = xp1 - xp; + s = (sp & 0xFF) >= p0; + if (UNPREDICTABLE(s)) + ans->state = xp; + else + ans->state = state - xp; + return s; +} + +static INLINE int uabs_read_bit(struct AnsDecoder *ans) { + int s; + unsigned state = ans->state; + if (state < l_base && ans->buf_offset > 0) { + state = state * io_base + ans->buf[--ans->buf_offset]; + } + s = (int)(state & 1); + ans->state = state >> 1; + return s; +} + +struct rans_sym { + AnsP8 prob; + AnsP8 cum_prob; // not-inclusive +}; + +struct rans_dec_sym { + uint8_t val; + AnsP8 prob; + AnsP8 cum_prob; // not-inclusive +}; + +typedef struct rans_dec_sym rans_dec_lut[ans_p8_precision]; + +static INLINE void rans_build_dec_tab(const AnsP8 token_probs[], + rans_dec_lut dec_tab) { + int val = 0; + int cum_prob = 0; + int sym_end = token_probs[0]; + int i; + for (i = 0; i < 256; ++i) { + if (i == sym_end) { + ++val; + cum_prob = sym_end; + sym_end += token_probs[val]; + } + dec_tab[i].val = val; + dec_tab[i].prob = token_probs[val]; + dec_tab[i].cum_prob = cum_prob; + } +} + +// rANS with normalization +// sym->prob takes the place of l_s from the paper +// ans_p8_precision is m +static INLINE void rans_write(struct AnsCoder *ans, + const struct rans_sym *const sym) { + const AnsP8 p = sym->prob; + if (ans->state >= l_base / ans_p8_precision * io_base * p) { + ans->buf[ans->buf_offset++] = ans->state % io_base; + ans->state /= io_base; + } + ans->state = + (ans->state / p) * ans_p8_precision + ans->state % p + sym->cum_prob; +} + +static INLINE int rans_read(struct AnsDecoder *ans, + const rans_dec_lut tab) { + unsigned rem; + unsigned quo; + int val; + if (ans->state < l_base && ans->buf_offset > 0) { + ans->state = ans->state * io_base + ans->buf[--ans->buf_offset]; + } + quo = ans->state / ans_p8_precision; + rem = ans->state % ans_p8_precision; + val = tab[rem].val; + + ans->state = quo * tab[rem].prob + rem - tab[rem].cum_prob; + return val; +} + +static INLINE int ans_read_init(struct AnsDecoder *const ans, + const uint8_t *const buf, + int offset) { + if (offset < 3) + return 1; + ans->buf = buf; + ans->buf_offset = offset - 3; + ans->state = mem_get_le24(buf + offset - 3); + return 0; +} + +static INLINE int ans_read_end(struct AnsDecoder *const ans) { + return ans->state == l_base; +} +#undef ANS_DIVREM +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // VP10_COMMON_ANS_H_ diff --git a/vp10/common/divide.c b/vp10/common/divide.c new file mode 100644 index 000000000..00b43a0f0 --- /dev/null +++ b/vp10/common/divide.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp10/common/divide.h" + +/* Constants for divide by multiply for small divisors generated with: +void init_fastdiv() { + int i; + for (i = 3; i < 256; ++i) { + const int s = 31 ^ __builtin_clz(2 * i + 1); + const unsigned long long base = (1ull << (sizeof(unsigned) * 8 + s)) - 1; + fastdiv_tab[i].mult = (base / i + 1) & 0xFFFFFFFF; + fastdiv_tab[i].shift = s; + } + for (i = 0; i < 8; ++i) { + fastdiv_tab[1 << i].mult = 0; + fastdiv_tab[1 << i].shift = i; + } +} +*/ +const struct fastdiv_elem vp10_fastdiv_tab[256] = { + {0, 0}, {0, 0}, {0, 1}, {1431655766, 2}, + {0, 2}, {2576980378, 3}, {1431655766, 3}, {613566757, 3}, + {0, 3}, {3340530120, 4}, {2576980378, 4}, {1952257862, 4}, + {1431655766, 4}, {991146300, 4}, {613566757, 4}, {286331154, 4}, + {0, 4}, {3789677026, 5}, {3340530120, 5}, {2938661835, 5}, + {2576980378, 5}, {2249744775, 5}, {1952257862, 5}, {1680639377, 5}, + {1431655766, 5}, {1202590843, 5}, {991146300, 5}, {795364315, 5}, + {613566757, 5}, {444306962, 5}, {286331154, 5}, {138547333, 5}, + {0, 5}, {4034666248, 6}, {3789677026, 6}, {3558687189, 6}, + {3340530120, 6}, {3134165325, 6}, {2938661835, 6}, {2753184165, 6}, + {2576980378, 6}, {2409371898, 6}, {2249744775, 6}, {2097542168, 6}, + {1952257862, 6}, {1813430637, 6}, {1680639377, 6}, {1553498810, 6}, + {1431655766, 6}, {1314785907, 6}, {1202590843, 6}, {1094795586, 6}, + {991146300, 6}, {891408307, 6}, {795364315, 6}, {702812831, 6}, + {613566757, 6}, {527452125, 6}, {444306962, 6}, {363980280, 6}, + {286331154, 6}, {211227900, 6}, {138547333, 6}, {68174085, 6}, + {0, 6}, {4162814457, 7}, {4034666248, 7}, {3910343360, 7}, + {3789677026, 7}, {3672508268, 7}, {3558687189, 7}, {3448072337, 7}, + {3340530120, 7}, {3235934265, 7}, {3134165325, 7}, {3035110223, 7}, + {2938661835, 7}, {2844718599, 7}, {2753184165, 7}, {2663967058, 7}, + {2576980378, 7}, {2492141518, 7}, {2409371898, 7}, {2328596727, 7}, + {2249744775, 7}, {2172748162, 7}, {2097542168, 7}, {2024065048, 7}, + {1952257862, 7}, {1882064321, 7}, {1813430637, 7}, {1746305385, 7}, + {1680639377, 7}, {1616385542, 7}, {1553498810, 7}, {1491936009, 7}, + {1431655766, 7}, {1372618415, 7}, {1314785907, 7}, {1258121734, 7}, + {1202590843, 7}, {1148159575, 7}, {1094795586, 7}, {1042467791, 7}, + {991146300, 7}, {940802361, 7}, {891408307, 7}, {842937507, 7}, + {795364315, 7}, {748664025, 7}, {702812831, 7}, {657787785, 7}, + {613566757, 7}, {570128403, 7}, {527452125, 7}, {485518043, 7}, + {444306962, 7}, {403800345, 7}, {363980280, 7}, {324829460, 7}, + {286331154, 7}, {248469183, 7}, {211227900, 7}, {174592167, 7}, + {138547333, 7}, {103079216, 7}, {68174085, 7}, {33818641, 7}, + {0, 7}, {4228378656, 8}, {4162814457, 8}, {4098251237, 8}, + {4034666248, 8}, {3972037425, 8}, {3910343360, 8}, {3849563281, 8}, + {3789677026, 8}, {3730665024, 8}, {3672508268, 8}, {3615188300, 8}, + {3558687189, 8}, {3502987511, 8}, {3448072337, 8}, {3393925206, 8}, + {3340530120, 8}, {3287871517, 8}, {3235934265, 8}, {3184703642, 8}, + {3134165325, 8}, {3084305374, 8}, {3035110223, 8}, {2986566663, 8}, + {2938661835, 8}, {2891383213, 8}, {2844718599, 8}, {2798656110, 8}, + {2753184165, 8}, {2708291480, 8}, {2663967058, 8}, {2620200175, 8}, + {2576980378, 8}, {2534297473, 8}, {2492141518, 8}, {2450502814, 8}, + {2409371898, 8}, {2368739540, 8}, {2328596727, 8}, {2288934667, 8}, + {2249744775, 8}, {2211018668, 8}, {2172748162, 8}, {2134925265, 8}, + {2097542168, 8}, {2060591247, 8}, {2024065048, 8}, {1987956292, 8}, + {1952257862, 8}, {1916962805, 8}, {1882064321, 8}, {1847555765, 8}, + {1813430637, 8}, {1779682582, 8}, {1746305385, 8}, {1713292966, 8}, + {1680639377, 8}, {1648338801, 8}, {1616385542, 8}, {1584774030, 8}, + {1553498810, 8}, {1522554545, 8}, {1491936009, 8}, {1461638086, 8}, + {1431655766, 8}, {1401984144, 8}, {1372618415, 8}, {1343553873, 8}, + {1314785907, 8}, {1286310003, 8}, {1258121734, 8}, {1230216764, 8}, + {1202590843, 8}, {1175239808, 8}, {1148159575, 8}, {1121346142, 8}, + {1094795586, 8}, {1068504060, 8}, {1042467791, 8}, {1016683080, 8}, + {991146300, 8}, {965853890, 8}, {940802361, 8}, {915988286, 8}, + {891408307, 8}, {867059126, 8}, {842937507, 8}, {819040276, 8}, + {795364315, 8}, {771906565, 8}, {748664025, 8}, {725633745, 8}, + {702812831, 8}, {680198441, 8}, {657787785, 8}, {635578121, 8}, + {613566757, 8}, {591751050, 8}, {570128403, 8}, {548696263, 8}, + {527452125, 8}, {506393524, 8}, {485518043, 8}, {464823301, 8}, + {444306962, 8}, {423966729, 8}, {403800345, 8}, {383805589, 8}, + {363980280, 8}, {344322273, 8}, {324829460, 8}, {305499766, 8}, + {286331154, 8}, {267321616, 8}, {248469183, 8}, {229771913, 8}, + {211227900, 8}, {192835267, 8}, {174592167, 8}, {156496785, 8}, + {138547333, 8}, {120742053, 8}, {103079216, 8}, {85557118, 8}, + {68174085, 8}, {50928466, 8}, {33818641, 8}, {16843010, 8}, +}; diff --git a/vp10/common/divide.h b/vp10/common/divide.h new file mode 100644 index 000000000..2f3c35cf2 --- /dev/null +++ b/vp10/common/divide.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP10_COMMON_DIVIDE_H_ +#define VP10_COMMON_DIVIDE_H_ +// An implemntation of the divide by multiply alogrithm +// https://gmplib.org/~tege/divcnst-pldi94.pdf + +#include + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +struct fastdiv_elem { + unsigned mult; + unsigned shift; +}; + +extern const struct fastdiv_elem vp10_fastdiv_tab[256]; + +static INLINE unsigned fastdiv(unsigned x, int y) { + unsigned t = + ((uint64_t)x * vp10_fastdiv_tab[y].mult) >> (sizeof(x) * CHAR_BIT); + return (t + x) >> vp10_fastdiv_tab[y].shift; +} +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // VP10_COMMON_DIVIDE_H_ diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk index f8c211279..bc3d84aa1 100644 --- a/vp10/vp10_common.mk +++ b/vp10/vp10_common.mk @@ -74,6 +74,9 @@ VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.h VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.c VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h +VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h +VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.h +VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.c VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c