From: Scott LaVarnway Date: Wed, 20 Jun 2018 22:10:46 +0000 (-0700) Subject: Add vpx_highbd_avg_8x8, vpx_highbd_avg_4x4 X-Git-Tag: v1.8.0~582^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a3c2774126a00674ea70fe6e2d722ea36b9f3527;p=libvpx Add vpx_highbd_avg_8x8, vpx_highbd_avg_4x4 BUG=webm:1537 Change-Id: I5f216f35436189b67d9f350991f41ed31431d4fe --- diff --git a/test/avg_test.cc b/test/avg_test.cc index 9c6410b1f..377e54209 100644 --- a/test/avg_test.cc +++ b/test/avg_test.cc @@ -22,40 +22,41 @@ #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" +#include "vpx/vpx_codec.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/vpx_timer.h" using libvpx_test::ACMRandom; namespace { + +template class AverageTestBase : public ::testing::Test { public: AverageTestBase(int width, int height) : width_(width), height_(height) {} - static void SetUpTestCase() { - source_data_ = reinterpret_cast( - vpx_memalign(kDataAlignment, kDataBlockSize)); - } - - static void TearDownTestCase() { + virtual void TearDown() { vpx_free(source_data_); source_data_ = NULL; + libvpx_test::ClearSystemState(); } - virtual void TearDown() { libvpx_test::ClearSystemState(); } - protected: // Handle blocks up to 4 blocks 64x64 with stride up to 128 static const int kDataAlignment = 16; static const int kDataBlockSize = 64 * 128; virtual void SetUp() { + source_data_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_TRUE(source_data_ != NULL); source_stride_ = (width_ + 31) & ~31; + bit_depth_ = 8; rnd_.Reset(ACMRandom::DeterministicSeed()); } // Sum Pixels - static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) { + static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) { unsigned int average = 0; for (int h = 0; h < 8; ++h) { for (int w = 0; w < 8; ++w) average += source[h * pitch + w]; @@ -63,7 +64,7 @@ class AverageTestBase : public ::testing::Test { return ((average + 32) >> 6); } - static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) { + static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) { unsigned int average = 0; for (int h = 0; h < 4; ++h) { for (int w = 0; w < 4; ++w) average += source[h * pitch + w]; @@ -71,7 +72,7 @@ class AverageTestBase : public ::testing::Test { return ((average + 8) >> 4); } - void FillConstant(uint8_t fill_constant) { + void FillConstant(Pixel fill_constant) { for (int i = 0; i < width_ * height_; ++i) { source_data_[i] = fill_constant; } @@ -79,13 +80,14 @@ class AverageTestBase : public ::testing::Test { void FillRandom() { for (int i = 0; i < width_ * height_; ++i) { - source_data_[i] = rnd_.Rand8(); + source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1); } } int width_, height_; - static uint8_t *source_data_; + Pixel *source_data_; int source_stride_; + int bit_depth_; ACMRandom rnd_; }; @@ -93,7 +95,7 @@ typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch); typedef ::testing::tuple AvgFunc; -class AverageTest : public AverageTestBase, +class AverageTest : public AverageTestBase, public ::testing::WithParamInterface { public: AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} @@ -119,12 +121,40 @@ class AverageTest : public AverageTestBase, } }; +#if CONFIG_VP9_HIGHBITDEPTH +class AverageTestHBD : public AverageTestBase, + public ::testing::WithParamInterface { + public: + AverageTestHBD() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} + + protected: + void CheckAverages() { + const int block_size = GET_PARAM(3); + unsigned int expected = 0; + if (block_size == 8) { + expected = + ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_); + } else if (block_size == 4) { + expected = + ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_); + } + + ASM_REGISTER_STATE_CHECK(GET_PARAM(4)( + CONVERT_TO_BYTEPTR(source_data_ + GET_PARAM(2)), source_stride_)); + unsigned int actual = GET_PARAM(4)( + CONVERT_TO_BYTEPTR(source_data_ + GET_PARAM(2)), source_stride_); + + EXPECT_EQ(expected, actual); + } +}; +#endif // CONFIG_VP9_HIGHBITDEPTH + typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref, const int ref_stride, const int height); typedef ::testing::tuple IntProRowParam; -class IntProRowTest : public AverageTestBase, +class IntProRowTest : public AverageTestBase, public ::testing::WithParamInterface { public: IntProRowTest() @@ -135,6 +165,10 @@ class IntProRowTest : public AverageTestBase, protected: virtual void SetUp() { + source_data_ = reinterpret_cast( + vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); + ASSERT_TRUE(source_data_ != NULL); + hbuf_asm_ = reinterpret_cast( vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16)); hbuf_c_ = reinterpret_cast( @@ -142,6 +176,8 @@ class IntProRowTest : public AverageTestBase, } virtual void TearDown() { + vpx_free(source_data_); + source_data_ = NULL; vpx_free(hbuf_c_); hbuf_c_ = NULL; vpx_free(hbuf_asm_); @@ -166,7 +202,7 @@ typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width); typedef ::testing::tuple IntProColParam; -class IntProColTest : public AverageTestBase, +class IntProColTest : public AverageTestBase, public ::testing::WithParamInterface { public: IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) { @@ -288,8 +324,6 @@ class BlockErrorTestFP ACMRandom rnd_; }; -uint8_t *AverageTestBase::source_data_ = NULL; - TEST_P(AverageTest, MinValue) { FillConstant(0); CheckAverages(); @@ -308,6 +342,27 @@ TEST_P(AverageTest, Random) { CheckAverages(); } } +#if CONFIG_VP9_HIGHBITDEPTH +TEST_P(AverageTestHBD, MinValue) { + FillConstant(0); + CheckAverages(); +} + +TEST_P(AverageTestHBD, MaxValue) { + FillConstant((1 << VPX_BITS_12) - 1); + CheckAverages(); +} + +TEST_P(AverageTestHBD, Random) { + bit_depth_ = VPX_BITS_12; + // The reference frame, but not the source frame, may be unaligned for + // certain types of searches. + for (int i = 0; i < 1000; i++) { + FillRandom(); + CheckAverages(); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH TEST_P(IntProRowTest, MinValue) { FillConstant(0); @@ -435,6 +490,20 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c), make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c))); +#if CONFIG_VP9_HIGHBITDEPTH +INSTANTIATE_TEST_CASE_P( + C, AverageTestHBD, + ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_c), + make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_c))); + +#if HAVE_SSE2 +INSTANTIATE_TEST_CASE_P( + SSE2, AverageTestHBD, + ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_sse2), + make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_sse2))); +#endif // HAVE_SSE2 +#endif // CONFIG_VP9_HIGHBITDEPTH + INSTANTIATE_TEST_CASE_P(C, SatdTest, ::testing::Values(make_tuple(16, &vpx_satd_c), make_tuple(64, &vpx_satd_c), diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 9f3e268cc..0f33562cc 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -985,7 +985,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Avg # add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *, int p"; + specialize qw/vpx_highbd_avg_8x8 sse2/; + add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *, int p"; + specialize qw/vpx_highbd_avg_4x4 sse2/; + add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; diff --git a/vpx_dsp/x86/avg_intrin_sse2.c b/vpx_dsp/x86/avg_intrin_sse2.c index a235ba41d..0362c63c5 100644 --- a/vpx_dsp/x86/avg_intrin_sse2.c +++ b/vpx_dsp/x86/avg_intrin_sse2.c @@ -138,6 +138,56 @@ unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) { return (avg + 8) >> 4; } +#if CONFIG_VP9_HIGHBITDEPTH +unsigned int vpx_highbd_avg_8x8_sse2(const uint8_t *s8, int p) { + __m128i s0, s1; + unsigned int avg; + const uint16_t *s = CONVERT_TO_SHORTPTR(s8); + const __m128i zero = _mm_setzero_si128(); + s0 = _mm_loadu_si128((const __m128i *)(s)); + s1 = _mm_loadu_si128((const __m128i *)(s + p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 2 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 3 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 4 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 5 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 6 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadu_si128((const __m128i *)(s + 7 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_unpackhi_epi16(s0, zero); + s0 = _mm_unpacklo_epi16(s0, zero); + s0 = _mm_add_epi32(s0, s1); + s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 8)); + s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 4)); + avg = _mm_cvtsi128_si32(s0); + + return (avg + 32) >> 6; +} + +unsigned int vpx_highbd_avg_4x4_sse2(const uint8_t *s8, int p) { + __m128i s0, s1; + unsigned int avg; + const uint16_t *s = CONVERT_TO_SHORTPTR(s8); + s0 = _mm_loadl_epi64((const __m128i *)(s)); + s1 = _mm_loadl_epi64((const __m128i *)(s + p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadl_epi64((const __m128i *)(s + 2 * p)); + s0 = _mm_adds_epu16(s0, s1); + s1 = _mm_loadl_epi64((const __m128i *)(s + 3 * p)); + s0 = _mm_adds_epu16(s0, s1); + s0 = _mm_add_epi16(s0, _mm_srli_si128(s0, 4)); + s0 = _mm_add_epi16(s0, _mm_srli_si128(s0, 2)); + avg = _mm_extract_epi16(s0, 0); + + return (avg + 8) >> 4; +} +#endif // CONFIG_VP9_HIGHBITDEPTH + static void hadamard_col8_sse2(__m128i *in, int iter) { __m128i a0 = in[0]; __m128i a1 = in[1];