From eb64ea3e89383c262593b91b6d891d6644bf175a Mon Sep 17 00:00:00 2001 From: James Zern Date: Tue, 29 Mar 2016 21:04:38 -0700 Subject: [PATCH] vpx_fdct16x16_1_c/msa: fix accumulator overflow tran_low_t is only signed 16-bits in non-high-bitdepth mode Change-Id: Ie02b5caf2658e8e71f995c17dd5ce666a4d64918 --- test/dct16x16_test.cc | 78 +++++++++++++++++++++++++++++++++++++ vpx_dsp/fwd_txfm.c | 4 +- vpx_dsp/mips/fwd_txfm_msa.c | 12 +++--- 3 files changed, 85 insertions(+), 9 deletions(-) diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc index d6cc5e443..c4dfaf345 100644 --- a/test/dct16x16_test.cc +++ b/test/dct16x16_test.cc @@ -792,6 +792,67 @@ TEST_P(InvTrans16x16DCT, CompareReference) { CompareInvReference(ref_txfm_, thresh_); } +class PartialTrans16x16Test + : public ::testing::TestWithParam< + std::tr1::tuple > { + public: + virtual ~PartialTrans16x16Test() {} + virtual void SetUp() { + fwd_txfm_ = GET_PARAM(0); + bit_depth_ = GET_PARAM(1); + } + + virtual void TearDown() { libvpx_test::ClearSystemState(); } + + protected: + vpx_bit_depth_t bit_depth_; + FdctFunc fwd_txfm_; +}; + +TEST_P(PartialTrans16x16Test, Extremes) { +#if CONFIG_VP9_HIGHBITDEPTH + const int16_t maxval = + static_cast(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + const int minval = -maxval; + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]); + + for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval; + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]); +} + +TEST_P(PartialTrans16x16Test, Random) { +#if CONFIG_VP9_HIGHBITDEPTH + const int16_t maxval = + static_cast(clip_pixel_highbd(1 << 30, bit_depth_)); +#else + const int16_t maxval = 255; +#endif + DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]); + DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]); + ACMRandom rnd(ACMRandom::DeterministicSeed()); + + int sum = 0; + for (int i = 0; i < kNumCoeffs; ++i) { + const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1); + input[i] = val; + sum += val; + } + output[0] = 0; + ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16)); + EXPECT_EQ(sum >> 1, output[0]); +} + using std::tr1::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH @@ -824,6 +885,11 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P( + C, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_8), + make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_10), + make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_12))); #else INSTANTIATE_TEST_CASE_P( C, Trans16x16HT, @@ -832,6 +898,9 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_fdct16x16_1_c, + VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -859,6 +928,9 @@ INSTANTIATE_TEST_CASE_P( VPX_BITS_8), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2, + VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -896,6 +968,9 @@ INSTANTIATE_TEST_CASE_P( &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12), make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12))); +INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2, + VPX_BITS_8))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE @@ -912,5 +987,8 @@ INSTANTIATE_TEST_CASE_P( make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3, VPX_BITS_8))); +INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test, + ::testing::Values(make_tuple(&vpx_fdct16x16_1_msa, + VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace diff --git a/vpx_dsp/fwd_txfm.c b/vpx_dsp/fwd_txfm.c index 39596b1e4..a5802e1f9 100644 --- a/vpx_dsp/fwd_txfm.c +++ b/vpx_dsp/fwd_txfm.c @@ -365,12 +365,12 @@ void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; - tran_low_t sum = 0; + int sum = 0; for (r = 0; r < 16; ++r) for (c = 0; c < 16; ++c) sum += input[r * stride + c]; - output[0] = sum >> 1; + output[0] = (tran_low_t)(sum >> 1); } static INLINE tran_high_t dct_32_round(tran_high_t input) { diff --git a/vpx_dsp/mips/fwd_txfm_msa.c b/vpx_dsp/mips/fwd_txfm_msa.c index f66dd5fce..0dd141f41 100644 --- a/vpx_dsp/mips/fwd_txfm_msa.c +++ b/vpx_dsp/mips/fwd_txfm_msa.c @@ -237,11 +237,9 @@ void vpx_fdct16x16_msa(const int16_t *input, int16_t *output, } void vpx_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) { - out[1] = 0; - - out[0] = LD_HADD(input, stride); - out[0] += LD_HADD(input + 8, stride); - out[0] += LD_HADD(input + 16 * 8, stride); - out[0] += LD_HADD(input + 16 * 8 + 8, stride); - out[0] >>= 1; + int sum = LD_HADD(input, stride); + sum += LD_HADD(input + 8, stride); + sum += LD_HADD(input + 16 * 8, stride); + sum += LD_HADD(input + 16 * 8 + 8, stride); + out[0] = (int16_t)(sum >> 1); } -- 2.40.0