From: Jim Bankoski Date: Tue, 14 Jul 2015 16:19:01 +0000 (-0700) Subject: Fill buffer speed up X-Git-Tag: v1.5.0~412 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0fe589f21ea75db7fbee2a84749d24b5666ad232;p=libvpx Fill buffer speed up Eliminates the byte by byte read from bool decoder, by reading in a size_t and then shifting it into place. Change-Id: I0ed8c7b6f942847e79cc90105dc1d2b5b3deb0d6 --- diff --git a/vpx_dsp/bitreader.c b/vpx_dsp/bitreader.c index 3eae922ca..4420fadee 100644 --- a/vpx_dsp/bitreader.c +++ b/vpx_dsp/bitreader.c @@ -7,11 +7,15 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#include "./bitreader.h" -#include "./prob.h" +#include +#include "./vpx_config.h" + +#include "vpx_dsp/bitreader.h" +#include "vpx_dsp/prob.h" #include "vpx_ports/mem.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_util/endian_inl.h" int vpx_reader_init(vpx_reader *r, const uint8_t *buffer, @@ -39,11 +43,9 @@ void vpx_reader_fill(vpx_reader *r) { const uint8_t *buffer_start = buffer; BD_VALUE value = r->value; int count = r->count; - int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); - int loop_end = 0; const size_t bytes_left = buffer_end - buffer; const size_t bits_left = bytes_left * CHAR_BIT; - const int x = (int)(shift + CHAR_BIT - bits_left); + int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); if (r->decrypt_cb) { size_t n = MIN(sizeof(r->clear_buffer), bytes_left); @@ -51,17 +53,34 @@ void vpx_reader_fill(vpx_reader *r) { buffer = r->clear_buffer; buffer_start = r->clear_buffer; } + if (bits_left > BD_VALUE_SIZE) { + const int bits = (shift & 0xfffffff8) + CHAR_BIT; + BD_VALUE nv; + BD_VALUE big_endian_values; + memcpy(&big_endian_values, buffer, sizeof(BD_VALUE)); +#if SIZE_MAX == 0xffffffffffffffffULL + big_endian_values = HToBE64(big_endian_values); +#else + big_endian_values = HToBE32(big_endian_values); +#endif + nv = big_endian_values >> (BD_VALUE_SIZE - bits); + count += bits; + buffer += (bits >> 3); + value = r->value | (nv << (shift & 0x7)); + } else { + const int bits_over = (int)(shift + CHAR_BIT - bits_left); + int loop_end = 0; + if (bits_over >= 0) { + count += LOTS_OF_BITS; + loop_end = bits_over; + } - if (x >= 0) { - count += LOTS_OF_BITS; - loop_end = x; - } - - if (x < 0 || bits_left) { - while (shift >= loop_end) { - count += CHAR_BIT; - value |= (BD_VALUE)*buffer++ << shift; - shift -= CHAR_BIT; + if (bits_over < 0 || bits_left) { + while (shift >= loop_end) { + count += CHAR_BIT; + value |= (BD_VALUE)*buffer++ << shift; + shift -= CHAR_BIT; + } } } diff --git a/vpx_util/endian_inl.h b/vpx_util/endian_inl.h new file mode 100644 index 000000000..12cc720a4 --- /dev/null +++ b/vpx_util/endian_inl.h @@ -0,0 +1,117 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Endian related functions. + +#ifndef VPX_UTIL_ENDIAN_INL_H_ +#define VPX_UTIL_ENDIAN_INL_H_ + +#include +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +#if defined(__GNUC__) +# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) +# define LOCAL_GCC_PREREQ(maj, min) \ + (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) +#else +# define LOCAL_GCC_VERSION 0 +# define LOCAL_GCC_PREREQ(maj, min) 0 +#endif + +#ifdef __clang__ +# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) +# define LOCAL_CLANG_PREREQ(maj, min) \ + (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) +#else +# define LOCAL_CLANG_VERSION 0 +# define LOCAL_CLANG_PREREQ(maj, min) 0 +#endif // __clang__ + +// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) +#if !defined(WORDS_BIGENDIAN) && \ + (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) +#define WORDS_BIGENDIAN +#endif + +#if defined(WORDS_BIGENDIAN) +#define HToLE32 BSwap32 +#define HToLE16 BSwap16 +#define HToBE64(x) (x) +#define HToBE32(x) (x) +#else +#define HToLE32(x) (x) +#define HToLE16(x) (x) +#define HToBE64(X) BSwap64(X) +#define HToBE32(X) BSwap32(X) +#endif + +// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64 +#if LOCAL_GCC_PREREQ(4, 3) || LOCAL_CLANG_PREREQ(3, 3) +#define HAVE_BUILTIN_BSWAP32 +#define HAVE_BUILTIN_BSWAP64 +#endif +// clang-3.3 and gcc-4.8 have a builtin function for swap16 +#if LOCAL_GCC_PREREQ(4, 8) || LOCAL_CLANG_PREREQ(3, 3) +#define HAVE_BUILTIN_BSWAP16 +#endif + +static INLINE uint16_t BSwap16(uint16_t x) { +#if defined(HAVE_BUILTIN_BSWAP16) + return __builtin_bswap16(x); +#elif defined(_MSC_VER) + return _byteswap_ushort(x); +#else + // gcc will recognize a 'rorw $8, ...' here: + return (x >> 8) | ((x & 0xff) << 8); +#endif // HAVE_BUILTIN_BSWAP16 +} + +static INLINE uint32_t BSwap32(uint32_t x) { +#if HAVE_MIPS32 + uint32_t ret; + __asm__ volatile ( + "wsbh %[ret], %[x] \n\t" + "rotr %[ret], %[ret], 16 \n\t" + : [ret]"=r"(ret) + : [x]"r"(x) + ); + return ret; +#elif defined(HAVE_BUILTIN_BSWAP32) + return __builtin_bswap32(x); +#elif defined(__i386__) || defined(__x86_64__) + uint32_t swapped_bytes; + __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x)); + return swapped_bytes; +#elif defined(_MSC_VER) + return (uint32_t)_byteswap_ulong(x); +#else + return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); +#endif // HAVE_BUILTIN_BSWAP32 +} + +static INLINE uint64_t BSwap64(uint64_t x) { +#if defined(HAVE_BUILTIN_BSWAP64) + return __builtin_bswap64(x); +#elif defined(__x86_64__) + uint64_t swapped_bytes; + __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x)); + return swapped_bytes; +#elif defined(_MSC_VER) + return (uint64_t)_byteswap_uint64(x); +#else // generic code for swapping 64-bit values (suggested by bdb@) + x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32); + x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16); + x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8); + return x; +#endif // HAVE_BUILTIN_BSWAP64 +} + +#endif // VPX_UTIL_ENDIAN_INL_H_ diff --git a/vpx_util/vpx_util.mk b/vpx_util/vpx_util.mk index 116112548..c0ef8d336 100644 --- a/vpx_util/vpx_util.mk +++ b/vpx_util/vpx_util.mk @@ -11,3 +11,4 @@ UTIL_SRCS-yes += vpx_util.mk UTIL_SRCS-yes += vpx_thread.c UTIL_SRCS-yes += vpx_thread.h +UTIL_SRCS-yes += endian_inl.h