]> granicus.if.org Git - libvpx/commitdiff
vpx_fdct32x32_1_c: fix accumulator overflow
authorJames Zern <jzern@google.com>
Wed, 30 Mar 2016 04:04:38 +0000 (21:04 -0700)
committerJames Zern <jzern@google.com>
Wed, 30 Mar 2016 22:20:20 +0000 (15:20 -0700)
tran_low_t is only 16-bits in non-high-bitdepth mode

Change-Id: Ifc06110c95e86e6d790c44250d52a538b2e9713b

test/dct32x32_test.cc
vpx_dsp/fwd_txfm.c

index 2dac10bc1f0deb2c310898985bbf4502d5f1294a..407d9f0b08e0e2a95c2a95920c5de30865d4a804 100644 (file)
@@ -305,6 +305,45 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
   }
 }
 
+class PartialTrans32x32Test
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
+ public:
+  virtual ~PartialTrans32x32Test() {}
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    bit_depth_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  FwdTxfmFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans32x32Test, Extremes) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  const int minval = -maxval;
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
+}
+
 using std::tr1::make_tuple;
 
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -323,6 +362,11 @@ INSTANTIATE_TEST_CASE_P(
                    &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
         make_tuple(&vpx_fdct32x32_rd_c,
                    &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    C, PartialTrans32x32Test,
+    ::testing::Values(make_tuple(vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
+                      make_tuple(vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
+                      make_tuple(vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
     C, Trans32x32Test,
@@ -331,6 +375,9 @@ INSTANTIATE_TEST_CASE_P(
                    &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
         make_tuple(&vpx_fdct32x32_rd_c,
                    &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(vpx_fdct32x32_1_c,
+                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -351,6 +398,9 @@ INSTANTIATE_TEST_CASE_P(
                    &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
         make_tuple(&vpx_fdct32x32_rd_sse2,
                    &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(vpx_fdct32x32_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 
 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -367,6 +417,9 @@ INSTANTIATE_TEST_CASE_P(
                    VPX_BITS_8),
         make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
                    VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(vpx_fdct32x32_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 
 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -387,5 +440,8 @@ INSTANTIATE_TEST_CASE_P(
                    &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
         make_tuple(&vpx_fdct32x32_rd_msa,
                    &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(vpx_fdct32x32_1_msa,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
index 7baaa8b0d0467c66bd19e60e4c6e5e8831d607b2..58d5f0c8988ccfcf9bb39d6da66877415c3ad455 100644 (file)
@@ -771,12 +771,12 @@ void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
 
 void vpx_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
   int r, c;
-  tran_low_t sum = 0;
+  int sum = 0;
   for (r = 0; r < 32; ++r)
     for (c = 0; c < 32; ++c)
       sum += input[r * stride + c];
 
-  output[0] = sum >> 3;
+  output[0] = (tran_low_t)(sum >> 3);
   output[1] = 0;
 }