Fix a bug in vp9_iht8x8_64_add_neon()

author Linfeng Zhang <linfengz@google.com>

Mon, 5 Mar 2018 23:16:49 +0000 (15:16 -0800)

committer Linfeng Zhang <linfengz@google.com>

Mon, 5 Mar 2018 23:33:37 +0000 (15:33 -0800)
author Linfeng Zhang <linfengz@google.com>
Mon, 5 Mar 2018 23:16:49 +0000 (15:16 -0800)
committer Linfeng Zhang <linfengz@google.com>
Mon, 5 Mar 2018 23:33:37 +0000 (15:33 -0800)
diff --git a/test/dct_test.cc b/test/dct_test.cc

index 2a6fccb67b2f30ff96483dd2d88c61b9b379bac4..bc1afbbe226fb99b3712edcba8f603cd22ca2d3b 100644 (file)
--- a/test/dct_test.cc
+++ b/test/dct_test.cc
@@ -639,10 +639,10 @@ static const FuncInfo ht_neon_func_info[] = {
  #endif
  #endif
    { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_neon>, 4, 1 },
-// TODO(linfengz): reenable these functions once test vector failures are
-// addressed.
-#if 0
+  // TODO(linfengz): reenable these functions once test vector failures are
+  // addressed.
    { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_neon>, 8, 1 },
+#if 0
    { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_neon>, 16, 1 }
  #endif
  };
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc

index 15033dbc1f531f4b302093925d9fcd0e9fd29ee1..d1b49dfa73606948aa75efb8666205a6fcdde1d7 100644 (file)
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -675,9 +675,8 @@ INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
                          ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
                                                       &vpx_idct8x8_64_add_neon,
                                                       0, VPX_BITS_8)));
-// TODO(linfengz): reenable these functions once test vector failures are
-// addressed.
-#if 0   // !CONFIG_VP9_HIGHBITDEPTH
+
+#if !CONFIG_VP9_HIGHBITDEPTH
  INSTANTIATE_TEST_CASE_P(
      NEON, FwdTrans8x8HT,
      ::testing::Values(
diff --git a/vp9/common/arm/neon/vp9_iht_neon.h b/vp9/common/arm/neon/vp9_iht_neon.h

index e918ebc7fffa682f03ac8f95a88a18ddc5ddb0ad..965eff36b1c84cde2afdb434dd93c0627d62b449 100644 (file)
--- a/vp9/common/arm/neon/vp9_iht_neon.h
+++ b/vp9/common/arm/neon/vp9_iht_neon.h
@@ -59,14 +59,17 @@ static INLINE void iadst4(int16x8_t *const io) {
  
  static INLINE void iadst_half_butterfly_neon(int16x8_t *const x,
                                               const int16x4_t c) {
-  const int16x8_t sum = vaddq_s16(x[0], x[1]);
-  const int16x8_t sub = vsubq_s16(x[0], x[1]);
+  // Don't add/sub before multiply, which will overflow in iadst8.
+  const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0);
+  const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0);
+  const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0);
+  const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0);
    int32x4_t t0[2], t1[2];
  
-  t0[0] = vmull_lane_s16(vget_low_s16(sum), c, 0);
-  t0[1] = vmull_lane_s16(vget_high_s16(sum), c, 0);
-  t1[0] = vmull_lane_s16(vget_low_s16(sub), c, 0);
-  t1[1] = vmull_lane_s16(vget_high_s16(sub), c, 0);
+  t0[0] = vaddq_s32(x0_lo, x1_lo);
+  t0[1] = vaddq_s32(x0_hi, x1_hi);
+  t1[0] = vsubq_s32(x0_lo, x1_lo);
+  t1[1] = vsubq_s32(x0_hi, x1_hi);
    x[0] = dct_const_round_shift_low_8(t0);
    x[1] = dct_const_round_shift_low_8(t1);
  }
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index 274edfd447fa1f48c41bdb0294079a2d5dd5df73..2b15b661cd40b63364ac215b344eb0d58c82356e 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -68,7 +68,7 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
    # Note that there are more specializations appended when
    # CONFIG_VP9_HIGHBITDEPTH is off.
    specialize qw/vp9_iht4x4_16_add neon sse2/;
-  specialize qw/vp9_iht8x8_64_add sse2/;
+  specialize qw/vp9_iht8x8_64_add neon sse2/;
    specialize qw/vp9_iht16x16_256_add sse2/;
    if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
      # Note that these specializations are appended to the above ones.
author	Linfeng Zhang <linfengz@google.com>
	Mon, 5 Mar 2018 23:16:49 +0000 (15:16 -0800)
committer	Linfeng Zhang <linfengz@google.com>
	Mon, 5 Mar 2018 23:33:37 +0000 (15:33 -0800)
test/dct_test.cc		patch \| blob \| history
test/fdct8x8_test.cc		patch \| blob \| history
vp9/common/arm/neon/vp9_iht_neon.h		patch \| blob \| history
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history