From 0269df41c1916f6c6df2a445d291b4a3c31922f6 Mon Sep 17 00:00:00 2001 From: James Zern Date: Tue, 29 Mar 2016 21:04:38 -0700 Subject: [PATCH] vpx_fdct32x32_1_c: fix accumulator overflow tran_low_t is only 16-bits in non-high-bitdepth mode Change-Id: Ifc06110c95e86e6d790c44250d52a538b2e9713b --- test/dct32x32_test.cc | 56 +++++++++++++++++++++++++++++++++++++++++++ vpx_dsp/fwd_txfm.c | 4 ++-- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc index 2dac10bc1..407d9f0b0 100644 --- a/test/dct32x32_test.cc +++ b/test/dct32x32_test.cc @@ -305,6 +305,45 @@ TEST_P(Trans32x32Test, InverseAccuracy) { } } +class PartialTrans32x32Test + : public ::testing::TestWithParam< + std::tr1::tuple > { + public: + virtual ~PartialTrans32x32Test() {} + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + bit_depth_ = GET_PARAM(1); + } + + virtual void TearDown() { libvpx_test::ClearSystemState(); } + + protected: + vpx_bit_depth_t bit_depth_; + FwdTxfmFunc fwd_txfm_; +}; + +TEST_P(PartialTrans32x32Test, Extremes) { +#if CONFIG_VP9_HIGHBITDEPTH + const int16_t maxval = + static_cast(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + const int minval = -maxval; + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32)); + EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32)); + EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]); +} + using std::tr1::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH @@ -323,6 +362,11 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P( + C, PartialTrans32x32Test, + ::testing::Values(make_tuple(vpx_highbd_fdct32x32_1_c, VPX_BITS_8), + make_tuple(vpx_highbd_fdct32x32_1_c, VPX_BITS_10), + make_tuple(vpx_highbd_fdct32x32_1_c, VPX_BITS_12))); #else INSTANTIATE_TEST_CASE_P( C, Trans32x32Test, @@ -331,6 +375,9 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test, + ::testing::Values(make_tuple(vpx_fdct32x32_1_c, + VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -351,6 +398,9 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test, + ::testing::Values(make_tuple(vpx_fdct32x32_1_sse2, + VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -367,6 +417,9 @@ INSTANTIATE_TEST_CASE_P( VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test, + ::testing::Values(make_tuple(vpx_fdct32x32_1_sse2, + VPX_BITS_8))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -387,5 +440,8 @@ INSTANTIATE_TEST_CASE_P( &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_msa, &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test, + ::testing::Values(make_tuple(vpx_fdct32x32_1_msa, + VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace diff --git a/vpx_dsp/fwd_txfm.c b/vpx_dsp/fwd_txfm.c index 7baaa8b0d..58d5f0c89 100644 --- a/vpx_dsp/fwd_txfm.c +++ b/vpx_dsp/fwd_txfm.c @@ -771,12 +771,12 @@ void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { void vpx_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; - tran_low_t sum = 0; + int sum = 0; for (r = 0; r < 32; ++r) for (c = 0; c < 32; ++c) sum += input[r * stride + c]; - output[0] = sum >> 3; + output[0] = (tran_low_t)(sum >> 3); output[1] = 0; } -- 2.40.0