convolve: support larger blocks, fix asm saturation bug

author John Koleszar <jkoleszar@google.com>

Thu, 18 Apr 2013 20:05:38 +0000 (13:05 -0700)

committer John Koleszar <jkoleszar@google.com>

Thu, 18 Apr 2013 20:57:59 +0000 (13:57 -0700)
author John Koleszar <jkoleszar@google.com>
Thu, 18 Apr 2013 20:05:38 +0000 (13:05 -0700)
committer John Koleszar <jkoleszar@google.com>
Thu, 18 Apr 2013 20:57:59 +0000 (13:57 -0700)
diff --git a/test/acm_random.h b/test/acm_random.h

index 514894edaf7758289c42925a28d09f85e92b10aa..84c6c75297983e386028befb57660b69c4acd788 100644 (file)
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -35,6 +35,13 @@ class ACMRandom {
      return (rand() >> 8) & 0xff;
    }
  
+  uint8_t Rand8Extremes(void) {
+    // Returns a random value near 0 or near 255, to better exercise
+    // saturation behavior.
+    const uint8_t r = Rand8();
+    return r < 128 ? r << 4 : r >> 4;
+  }
+
    int PseudoUniform(int range) {
      return (rand() >> 8) % range;
    }
diff --git a/test/convolve_test.cc b/test/convolve_test.cc

index 35065a41f755a30589e91bb1094db2b2e85dbb5d..a8139cbd304c67a0d5e17232e20b695d9e64d45f 100644 (file)
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -66,7 +66,7 @@ static void filter_block2d_8_c(const uint8_t *src_ptr,
    // support.
    const int kInterp_Extend = 4;
    const unsigned int intermediate_height =
-    (kInterp_Extend - 1) +     output_height + kInterp_Extend;
+    (kInterp_Extend - 1) + output_height + kInterp_Extend;
  
    /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
     * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
@@ -75,7 +75,7 @@ static void filter_block2d_8_c(const uint8_t *src_ptr,
     *                               = 23
     * and filter_max_width = 16
     */
-  uint8_t intermediate_buffer[23 * 16];
+  uint8_t intermediate_buffer[71 * 64];
    const int intermediate_next_stride = 1 - intermediate_height * output_width;
  
    // Horizontal pass (src -> transposed intermediate).
@@ -158,13 +158,13 @@ static void filter_average_block2d_8_c(const uint8_t *src_ptr,
                                         unsigned int dst_stride,
                                         unsigned int output_width,
                                         unsigned int output_height) {
-  uint8_t tmp[16*16];
+  uint8_t tmp[64*64];
  
-  assert(output_width <= 16);
-  assert(output_height <= 16);
-  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 16,
+  assert(output_width <= 64);
+  assert(output_height <= 64);
+  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
                       output_width, output_height);
-  block2d_average_c(tmp, 16, dst_ptr, dst_stride,
+  block2d_average_c(tmp, 64, dst_ptr, dst_stride,
                      output_width, output_height);
  }
  
@@ -188,10 +188,10 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
  
    protected:
      static const int kDataAlignment = 16;
-    static const int kOuterBlockSize = 32;
+    static const int kOuterBlockSize = 128;
      static const int kInputStride = kOuterBlockSize;
      static const int kOutputStride = kOuterBlockSize;
-    static const int kMaxDimension = 16;
+    static const int kMaxDimension = 64;
  
      int Width() const { return GET_PARAM(0); }
      int Height() const { return GET_PARAM(1); }
@@ -221,7 +221,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
  
        ::libvpx_test::ACMRandom prng;
        for (int i = 0; i < kOuterBlockSize * kOuterBlockSize; ++i)
-        input_[i] = prng.Rand8();
+        input_[i] = prng.Rand8Extremes();
      }
  
      void CheckGuardBlocks() {
@@ -308,6 +308,29 @@ const int16_t (*kTestFilterList[])[8] = {
    vp9_sub_pel_filters_8s,
    vp9_sub_pel_filters_8lp
  };
+const int kNumFilterBanks = sizeof(kTestFilterList) /
+    sizeof(kTestFilterList[0]);
+const int kNumFilters = 16;
+
+TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
+  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+    const int16_t (*filters)[8] = kTestFilterList[filter_bank];
+    for (int i = 0; i < kNumFilters; i++) {
+      const int p0 = filters[i][0] + filters[i][1];
+      const int p1 = filters[i][2] + filters[i][3];
+      const int p2 = filters[i][4] + filters[i][5];
+      const int p3 = filters[i][6] + filters[i][7];
+      EXPECT_LE(p0, 128);
+      EXPECT_LE(p1, 128);
+      EXPECT_LE(p2, 128);
+      EXPECT_LE(p3, 128);
+      EXPECT_LE(p0 + p3, 128);
+      EXPECT_LE(p0 + p3 + p1, 128);
+      EXPECT_LE(p0 + p3 + p1 + p2, 128);
+      EXPECT_EQ(p0 + p1 + p2 + p3, 128);
+    }
+  }
+}
  
  const int16_t kInvalidFilter[8] = { 0 };
  
@@ -316,12 +339,9 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
    uint8_t* const out = output();
    uint8_t ref[kOutputStride * kMaxDimension];
  
-  const int kNumFilterBanks = sizeof(kTestFilterList) /
-      sizeof(kTestFilterList[0]);
  
    for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
      const int16_t (*filters)[8] = kTestFilterList[filter_bank];
-    const int kNumFilters = 16;
  
      for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
        for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
@@ -368,7 +388,7 @@ TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
    ::libvpx_test::ACMRandom prng;
    for (int y = 0; y < Height(); ++y) {
      for (int x = 0; x < Width(); ++x) {
-      const uint8_t r = prng.Rand8();
+      const uint8_t r = prng.Rand8Extremes();
  
        out[y * kOutputStride + x] = r;
        ref[y * kOutputStride + x] = r;
@@ -440,16 +460,17 @@ DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
  TEST_P(ConvolveTest, ChangeFilterWorks) {
    uint8_t* const in = input();
    uint8_t* const out = output();
+  const int kPixelSelected = 4;
  
    REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
                                   kChangeFilters[8], 17, kChangeFilters[4], 16,
                                   Width(), Height()));
  
    for (int x = 0; x < Width(); ++x) {
-    if (x < 8)
-      ASSERT_EQ(in[4], out[x]) << "x == " << x;
-    else
-      ASSERT_EQ(in[12], out[x]) << "x == " << x;
+    const int kQ4StepAdjust = x >> 4;
+    const int kFilterPeriodAdjust = (x >> 3) << 3;
+    const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
+    ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;
    }
  
    REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
@@ -457,10 +478,10 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
                                   Width(), Height()));
  
    for (int y = 0; y < Height(); ++y) {
-    if (y < 8)
-      ASSERT_EQ(in[4 * kInputStride], out[y * kOutputStride]) << "y == " << y;
-    else
-      ASSERT_EQ(in[12 * kInputStride], out[y * kOutputStride]) << "y == " << y;
+    const int kQ4StepAdjust = y >> 4;
+    const int kFilterPeriodAdjust = (y >> 3) << 3;
+    const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
+    ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;
    }
  
    REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
@@ -468,9 +489,13 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
                                    Width(), Height()));
  
    for (int y = 0; y < Height(); ++y) {
+    const int kQ4StepAdjustY = y >> 4;
+    const int kFilterPeriodAdjustY = (y >> 3) << 3;
+    const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;
      for (int x = 0; x < Width(); ++x) {
-      const int ref_x = x < 8 ? 4 : 12;
-      const int ref_y = y < 8 ? 4 : 12;
+      const int kQ4StepAdjustX = x >> 4;
+      const int kFilterPeriodAdjustX = (x >> 3) << 3;
+      const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;
  
        ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
            << "x == " << x << ", y == " << y;
@@ -489,9 +514,17 @@ const ConvolveFunctions convolve8_c(
  INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
      make_tuple(4, 4, &convolve8_c),
      make_tuple(8, 4, &convolve8_c),
+    make_tuple(4, 8, &convolve8_c),
      make_tuple(8, 8, &convolve8_c),
      make_tuple(16, 8, &convolve8_c),
-    make_tuple(16, 16, &convolve8_c)));
+    make_tuple(8, 16, &convolve8_c),
+    make_tuple(16, 16, &convolve8_c),
+    make_tuple(32, 16, &convolve8_c),
+    make_tuple(16, 32, &convolve8_c),
+    make_tuple(32, 32, &convolve8_c),
+    make_tuple(64, 32, &convolve8_c),
+    make_tuple(32, 64, &convolve8_c),
+    make_tuple(64, 64, &convolve8_c)));
  }
  
  #if HAVE_SSSE3
@@ -503,7 +536,15 @@ const ConvolveFunctions convolve8_ssse3(
  INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
      make_tuple(4, 4, &convolve8_ssse3),
      make_tuple(8, 4, &convolve8_ssse3),
+    make_tuple(4, 8, &convolve8_ssse3),
      make_tuple(8, 8, &convolve8_ssse3),
      make_tuple(16, 8, &convolve8_ssse3),
-    make_tuple(16, 16, &convolve8_ssse3)));
+    make_tuple(8, 16, &convolve8_ssse3),
+    make_tuple(16, 16, &convolve8_ssse3),
+    make_tuple(32, 16, &convolve8_ssse3),
+    make_tuple(16, 32, &convolve8_ssse3),
+    make_tuple(32, 32, &convolve8_ssse3),
+    make_tuple(64, 32, &convolve8_ssse3),
+    make_tuple(32, 64, &convolve8_ssse3),
+    make_tuple(64, 64, &convolve8_ssse3)));
  #endif
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c

index 3ab8bec7a162d0b16c107dbf1b5dac9a67ed85bb..a27ca6f5da88d57808856a3feaf98baad979aa13 100644 (file)
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -331,14 +331,14 @@ static void convolve_c(const uint8_t *src, int src_stride,
                         const int16_t *filter_y, int y_step_q4,
                         int w, int h, int taps) {
    /* Fixed size intermediate buffer places limits on parameters.
-   * Maximum intermediate_height is 39, for y_step_q4 == 32,
-   * h == 16, taps == 8.
+   * Maximum intermediate_height is 135, for y_step_q4 == 32,
+   * h == 64, taps == 8.
     */
-  uint8_t temp[16 * 39];
+  uint8_t temp[64 * 135];
    int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
  
-  assert(w <= 16);
-  assert(h <= 16);
+  assert(w <= 64);
+  assert(h <= 64);
    assert(taps <= 8);
    assert(y_step_q4 <= 32);
  
@@ -346,10 +346,10 @@ static void convolve_c(const uint8_t *src, int src_stride,
      intermediate_height = h;
  
    convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
-                   temp, 16,
+                   temp, 64,
                     filter_x, x_step_q4, filter_y, y_step_q4,
                     w, intermediate_height, taps);
-  convolve_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
+  convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
                    filter_x, x_step_q4, filter_y, y_step_q4,
                    w, h, taps);
  }
@@ -360,14 +360,14 @@ static void convolve_avg_c(const uint8_t *src, int src_stride,
                             const int16_t *filter_y, int y_step_q4,
                             int w, int h, int taps) {
    /* Fixed size intermediate buffer places limits on parameters.
-   * Maximum intermediate_height is 39, for y_step_q4 == 32,
-   * h == 16, taps == 8.
+   * Maximum intermediate_height is 135, for y_step_q4 == 32,
+   * h == 64, taps == 8.
     */
-  uint8_t temp[16 * 39];
+  uint8_t temp[64 * 135];
    int intermediate_height = ((h * y_step_q4) >> 4) + taps - 1;
  
-  assert(w <= 16);
-  assert(h <= 16);
+  assert(w <= 64);
+  assert(h <= 64);
    assert(taps <= 8);
    assert(y_step_q4 <= 32);
  
@@ -375,10 +375,10 @@ static void convolve_avg_c(const uint8_t *src, int src_stride,
      intermediate_height = h;
  
    convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,
-                   temp, 16,
+                   temp, 64,
                     filter_x, x_step_q4, filter_y, y_step_q4,
                     w, intermediate_height, taps);
-  convolve_avg_vert_c(temp + 16 * (taps / 2 - 1), 16, dst, dst_stride,
+  convolve_avg_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,
                        filter_x, x_step_q4, filter_y, y_step_q4,
                        w, h, taps);
  }
@@ -563,16 +563,16 @@ void vp9_convolve8_avg_c(const uint8_t *src, int src_stride,
                           const int16_t *filter_y, int y_step_q4,
                           int w, int h) {
    /* Fixed size intermediate buffer places limits on parameters. */
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 16 * 16);
-  assert(w <= 16);
-  assert(h <= 16);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
+  assert(w <= 64);
+  assert(h <= 64);
  
    vp9_convolve8(src, src_stride,
-                temp, 16,
+                temp, 64,
                  filter_x, x_step_q4,
                  filter_y, y_step_q4,
                  w, h);
-  vp9_convolve_avg(temp, 16,
+  vp9_convolve_avg(temp, 64,
                     dst, dst_stride,
                     NULL, 0, /* These unused parameter should be removed! */
                     NULL, 0, /* These unused parameter should be removed! */
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c

index 6d3bb021a7d31ddad0ac8cdce3067abb01121146..310f8ed24d177a44536e24e6a5130e0cc061e30f 100644 (file)
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -278,11 +278,9 @@ void vp9_convolve8_ssse3(const uint8_t *src, int src_stride,
                           const int16_t *filter_x, int x_step_q4,
                           const int16_t *filter_y, int y_step_q4,
                           int w, int h) {
-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);
+  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
  
-  // check w/h due to fixed size fdata2 array
-  assert(w <= 16);
-  assert(h <= 16);
+  assert(h <= 64);
  
    if (x_step_q4 == 16 && y_step_q4 == 16 &&
        filter_x[3] != 128 && filter_y[3] != 128) {
@@ -324,11 +322,9 @@ void vp9_convolve8_avg_ssse3(const uint8_t *src, int src_stride,
                           const int16_t *filter_x, int x_step_q4,
                           const int16_t *filter_y, int y_step_q4,
                           int w, int h) {
-  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*23);
+  DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 16*71);
  
-  // check w/h due to fixed size fdata2 array
-  assert(w <= 16);
-  assert(h <= 16);
+  assert(h <= 64);
  
    if (x_step_q4 == 16 && y_step_q4 == 16 &&
        filter_x[3] != 128 && filter_y[3] != 128) {
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm

index 32f00e2893dade99ee9085866fee80cbc39b893c..bbf9888caf67ba8e8414b2de7879788095c623c4 100644 (file)
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -81,10 +81,10 @@
      pmaddubsw   xmm4, k4k5
      pmaddubsw   xmm6, k6k7
  
+    paddsw      xmm0, xmm6
      paddsw      xmm0, xmm2
-    paddsw      xmm0, krd
-    paddsw      xmm4, xmm6
      paddsw      xmm0, xmm4
+    paddsw      xmm0, krd
  
      psraw       xmm0, 7
      packuswb    xmm0, xmm0
@@ -165,10 +165,10 @@
      pmaddubsw   xmm4, k4k5
      pmaddubsw   xmm6, k6k7
  
+    paddsw      xmm0, xmm6
      paddsw      xmm0, xmm2
-    paddsw      xmm0, krd
-    paddsw      xmm4, xmm6
      paddsw      xmm0, xmm4
+    paddsw      xmm0, krd
  
      psraw       xmm0, 7
      packuswb    xmm0, xmm0
@@ -250,10 +250,10 @@
      pmaddubsw   xmm4, k4k5
      pmaddubsw   xmm6, k6k7
  
+    paddsw      xmm0, xmm6
      paddsw      xmm0, xmm2
-    paddsw      xmm0, krd
-    paddsw      xmm4, xmm6
      paddsw      xmm0, xmm4
+    paddsw      xmm0, krd
  
      psraw       xmm0, 7
      packuswb    xmm0, xmm0
@@ -285,10 +285,10 @@
      pmaddubsw   xmm4, k4k5
      pmaddubsw   xmm6, k6k7
  
+    paddsw      xmm0, xmm6
      paddsw      xmm0, xmm2
-    paddsw      xmm4, xmm6
-    paddsw      xmm0, krd
      paddsw      xmm0, xmm4
+    paddsw      xmm0, krd
  
      psraw       xmm0, 7
      packuswb    xmm0, xmm0
author	John Koleszar <jkoleszar@google.com>
	Thu, 18 Apr 2013 20:05:38 +0000 (13:05 -0700)
committer	John Koleszar <jkoleszar@google.com>
	Thu, 18 Apr 2013 20:57:59 +0000 (13:57 -0700)
test/acm_random.h		patch \| blob \| history
test/convolve_test.cc		patch \| blob \| history
vp9/common/vp9_convolve.c		patch \| blob \| history
vp9/common/x86/vp9_asm_stubs.c		patch \| blob \| history
vp9/common/x86/vp9_subpixel_8t_ssse3.asm		patch \| blob \| history