Reduce memory footprint for CLPF decoding.

author Steinar Midtskogen <stemidts@cisco.com>

Wed, 24 Aug 2016 11:00:04 +0000 (13:00 +0200)

committer Yaowu Xu <yaowu@google.com>

Mon, 10 Oct 2016 18:26:33 +0000 (11:26 -0700)
author Steinar Midtskogen <stemidts@cisco.com>
Wed, 24 Aug 2016 11:00:04 +0000 (13:00 +0200)
committer Yaowu Xu <yaowu@google.com>
Mon, 10 Oct 2016 18:26:33 +0000 (11:26 -0700)
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl

index a2b9a75d7c1c3a062bb691a60a6c4b707e4e2286..5f7384be73c31329a9e8ef55fc350da47f2c65e4 100644 (file)
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -587,7 +587,7 @@ add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint
  specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
  
  if (aom_config("CONFIG_CLPF") eq "yes") {
-  add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int stride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength";
+  add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, int width, int height, unsigned int strength";
    specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
    add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength";
    specialize qw/aom_clpf_detect sse2 ssse3 sse4_1 neon/;
diff --git a/av1/common/clpf.c b/av1/common/clpf.c

index 799af01846af52d24dd66d9b6b0ff2708502c4a8..1ca60e056d3164cd109547ca5791c335fb865c54 100644 (file)
--- a/av1/common/clpf.c
+++ b/av1/common/clpf.c
@@ -27,30 +27,30 @@ int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) {
    return (8 + delta - (delta < 0)) >> 4;
  }
  
-void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int stride, int x0,
-                      int y0, int sizex, int sizey, int width, int height,
-                      unsigned int strength) {
+void aom_clpf_block_c(const uint8_t *src, uint8_t *dst, int sstride,
+                      int dstride, int x0, int y0, int sizex, int sizey,
+                      int width, int height, unsigned int strength) {
    int x, y;
    for (y = y0; y < y0 + sizey; y++) {
      for (x = x0; x < x0 + sizex; x++) {
-      int X = src[y * stride + x];
-      int A = src[AOMMAX(0, y - 1) * stride + x];
-      int B = src[y * stride + AOMMAX(0, x - 2)];
-      int C = src[y * stride + AOMMAX(0, x - 1)];
-      int D = src[y * stride + AOMMIN(width - 1, x + 1)];
-      int E = src[y * stride + AOMMIN(width - 1, x + 2)];
-      int F = src[AOMMIN(height - 1, y + 1) * stride + x];
+      int X = src[y * sstride + x];
+      int A = src[AOMMAX(0, y - 1) * sstride + x];
+      int B = src[y * sstride + AOMMAX(0, x - 2)];
+      int C = src[y * sstride + AOMMAX(0, x - 1)];
+      int D = src[y * sstride + AOMMIN(width - 1, x + 1)];
+      int E = src[y * sstride + AOMMIN(width - 1, x + 2)];
+      int F = src[AOMMIN(height - 1, y + 1) * sstride + x];
        int delta;
        delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
-      dst[y * stride + x] = X + delta;
+      dst[y * dstride + x] = X + delta;
      }
    }
  }
  
  // Return number of filtered blocks
-int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
-                   const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
-                   int enable_fb_flag, unsigned int strength,
+int av1_clpf_frame(const YV12_BUFFER_CONFIG *orig_dst,
+                   const YV12_BUFFER_CONFIG *rec, const YV12_BUFFER_CONFIG *org,
+                   AV1_COMMON *cm, int enable_fb_flag, unsigned int strength,
                     unsigned int fb_size_log2, uint8_t *blocks,
                     int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
                                     const YV12_BUFFER_CONFIG *,
@@ -59,23 +59,45 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
    /* Constrained low-pass filter (CLPF) */
    int c, k, l, m, n;
    const int bs = MI_SIZE;
-  int width = rec->y_crop_width;
-  int height = rec->y_crop_height;
+  const int width = rec->y_crop_width;
+  const int height = rec->y_crop_height;
    int xpos, ypos;
-  int stride_y = rec->y_stride;
-  int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2;
-  int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2;
+  const int sstride = rec->y_stride;
+  int dstride = orig_dst->y_stride;
+  const int num_fb_hor = (width + (1 << fb_size_log2) - 1) >> fb_size_log2;
+  const int num_fb_ver = (height + (1 << fb_size_log2) - 1) >> fb_size_log2;
    int block_index = 0;
+  uint8_t *cache = NULL;
+  uint8_t **cache_ptr = NULL;
+  uint8_t **cache_dst = NULL;
+  int cache_idx = 0;
+  const int cache_size = num_fb_hor << (2 * fb_size_log2);
+  const int cache_blocks = cache_size / (bs * bs);
+  YV12_BUFFER_CONFIG dst = *orig_dst;
+
+  // Make buffer space for in-place filtering
+  if (rec->y_buffer == dst.y_buffer) {
+    CHECK_MEM_ERROR(cm, cache, aom_malloc(cache_size));
+    CHECK_MEM_ERROR(cm, cache_ptr,
+                    aom_malloc(cache_blocks * sizeof(*cache_ptr)));
+    CHECK_MEM_ERROR(cm, cache_dst,
+                    aom_malloc(cache_blocks * sizeof(*cache_dst)));
+    memset(cache_ptr, 0, cache_blocks * sizeof(*cache_dst));
+    dst.y_buffer = cache;
+    dstride = bs;
+  }
  
    // Iterate over all filter blocks
    for (k = 0; k < num_fb_ver; k++) {
      for (l = 0; l < num_fb_hor; l++) {
        int h, w;
        int allskip = 1;
+      const int xoff = l << fb_size_log2;
+      const int yoff = k << fb_size_log2;
        for (m = 0; allskip && m < (1 << fb_size_log2) / bs; m++) {
          for (n = 0; allskip && n < (1 << fb_size_log2) / bs; n++) {
-          xpos = (l << fb_size_log2) + n * bs;
-          ypos = (k << fb_size_log2) + m * bs;
+          xpos = xoff + n * bs;
+          ypos = yoff + m * bs;
            if (xpos < width && ypos < height) {
              allskip &=
                  cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
@@ -96,31 +118,57 @@ int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
          // Iterate over all smaller blocks inside the filter block
          for (m = 0; m < (h + bs - 1) / bs; m++) {
            for (n = 0; n < (w + bs - 1) / bs; n++) {
-            xpos = (l << fb_size_log2) + n * bs;
-            ypos = (k << fb_size_log2) + m * bs;
+            xpos = xoff + n * bs;
+            ypos = yoff + m * bs;
              if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
-                     ->mbmi.skip) {
-              // Not skip block, apply the filter
-              aom_clpf_block(rec->y_buffer, dst->y_buffer, stride_y, xpos, ypos,
-                             bs, bs, width, height, strength);
+                     ->mbmi.skip) {  // Not skip block
+              // Temporary buffering needed if filtering in-place
+              if (cache) {
+                if (cache_ptr[cache_idx]) {
+                  // Copy filtered block back into the frame
+                  for (c = 0; c < bs; c++)
+                    *(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
+                        *(uint64_t *)(cache_ptr[cache_idx] + c * bs);
+                }
+                cache_ptr[cache_idx] = cache + cache_idx * bs * bs;
+                dst.y_buffer = cache_ptr[cache_idx] - ypos * bs - xpos;
+                cache_dst[cache_idx] = rec->y_buffer + ypos * sstride + xpos;
+                if (++cache_idx >= cache_blocks) cache_idx = 0;
+              }
+
+              // Apply the filter
+              aom_clpf_block(rec->y_buffer, dst.y_buffer, sstride, dstride,
+                             xpos, ypos, bs, bs, width, height, strength);
+
              } else {  // Skip block, copy instead
-              for (c = 0; c < bs; c++)
-                *(uint64_t *)(dst->y_buffer + (ypos + c) * stride_y + xpos) =
-                    *(uint64_t *)(rec->y_buffer + (ypos + c) * stride_y + xpos);
+              if (!cache)
+                for (c = 0; c < bs; c++)
+                  *(uint64_t *)(dst.y_buffer + (ypos + c) * dstride + xpos) = *(
+                      uint64_t *)(rec->y_buffer + (ypos + c) * sstride + xpos);
              }
            }
          }
        } else {  // Entire filter block is skip, copy
-        for (m = 0; m < h; m++)
-          memcpy(dst->y_buffer + ((k << fb_size_log2) + m) * stride_y +
-                     (l << fb_size_log2),
-                 rec->y_buffer + ((k << fb_size_log2) + m) * stride_y +
-                     (l << fb_size_log2),
-                 w);
+        if (!cache)
+          for (m = 0; m < h; m++)
+            memcpy(dst.y_buffer + (yoff + m) * dstride + xoff,
+                   rec->y_buffer + (yoff + m) * sstride + xoff, w);
        }
        block_index += !allskip;  // Count number of blocks filtered
      }
    }
  
+  if (cache) {
+    // Copy remaining blocks into the frame
+    for (cache_idx = 0; cache_idx < cache_blocks && cache_ptr[cache_idx];
+         cache_idx++)
+      for (c = 0; c < bs; c++)
+        *(uint64_t *)(cache_dst[cache_idx] + c * sstride) =
+            *(uint64_t *)(cache_ptr[cache_idx] + c * bs);
+
+    aom_free(cache);
+    aom_free(cache_ptr);
+  }
+
    return block_index;
  }
diff --git a/av1/common/clpf.h b/av1/common/clpf.h

index 21671a1c11fd5b033e0b7eb4ae90bd7a39f225f1..2fb12d6c669da3897280dd2810c6edef1b202370 100644 (file)
--- a/av1/common/clpf.h
+++ b/av1/common/clpf.h
@@ -18,7 +18,7 @@
  int av1_clpf_maxbits(const AV1_COMMON *cm);
  int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b);
  int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
-                   const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+                   const YV12_BUFFER_CONFIG *org, AV1_COMMON *cm,
                     int enable_fb_flag, unsigned int strength,
                     unsigned int fb_size_log2, uint8_t *blocks,
                     int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
diff --git a/av1/common/clpf_simd.h b/av1/common/clpf_simd.h

index 0df6cd74ee80411bcdc8eaa198a86314384473bd..544aa36f7217d17ea9faee6af27b28b82d50b11e 100644 (file)
--- a/av1/common/clpf_simd.h
+++ b/av1/common/clpf_simd.h
@@ -11,11 +11,11 @@
  
  #include "./aom_dsp_rtcd.h"
  
-static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
-                       int y0, int sizey, int width, int height,
-                       unsigned int strength) {
-  dst += x0 + y0 * stride;
-  src += x0 + y0 * stride;
+static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride,
+                       int dstride, int x0, int y0, int sizey, int width,
+                       int height, unsigned int strength) {
+  dst += x0 + y0 * dstride;
+  src += x0 + y0 * sstride;
    {
      int bottom = height - 2 - y0;
      const v128 sp = v128_dup_8(strength);
@@ -32,23 +32,23 @@ static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
  
        for (y = 0; y < sizey; y += 2) {
          const v64 l1 = v64_load_aligned(src);
-        const v64 l2 = v64_load_aligned(src + stride);
+        const v64 l2 = v64_load_aligned(src + sstride);
          v128 o = v128_from_v64(l1, l2);
          const v128 x = v128_add_8(c128, o);
          const v128 a = v128_add_8(
              c128,
-            v128_from_v64(v64_load_aligned(src - (y != -y0) * stride), l1));
+            v128_from_v64(v64_load_aligned(src - (y != -y0) * sstride), l1));
          const v128 b = v128_shuffle_8(x, b_shuff);
          const v128 c = v128_shuffle_8(x, c_shuff);
          const v128 d = v128_add_8(
              c128, v128_from_v64(v64_load_unaligned(src + 1),
-                                v64_load_unaligned(src + 1 + stride)));
+                                v64_load_unaligned(src + 1 + sstride)));
          const v128 e = v128_add_8(
              c128, v128_from_v64(v64_load_unaligned(src + 2),
-                                v64_load_unaligned(src + 2 + stride)));
+                                v64_load_unaligned(src + 2 + sstride)));
          const v128 f = v128_add_8(
              c128, v128_from_v64(l2, v64_load_aligned(
-                                        src + ((y != bottom) + 1) * stride)));
+                                        src + ((y != bottom) + 1) * sstride)));
  
          const v128 tmp =
              v128_add_8(v128_max_s8(v128_min_s8(v128_ssub_s8(c, x), sp), sm),
@@ -70,9 +70,9 @@ static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
                                                          delta, v128_zero()))),
                     4));
          v64_store_aligned(dst, v128_high_v64(o));
-        v64_store_aligned(dst + stride, v128_low_v64(o));
-        src += stride * 2;
-        dst += stride * 2;
+        v64_store_aligned(dst + dstride, v128_low_v64(o));
+        src += sstride * 2;
+        dst += dstride * 2;
        }
      } else if (!(width - x0 - 8)) {  // Clip right
        const v128 d_shuff = v128_from_v64(v64_from_64(0x0f0f0e0d0c0b0a09LL),
@@ -83,23 +83,23 @@ static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
  
        for (y = 0; y < sizey; y += 2) {
          const v64 l1 = v64_load_aligned(src);
-        const v64 l2 = v64_load_aligned(src + stride);
+        const v64 l2 = v64_load_aligned(src + sstride);
          v128 o = v128_from_v64(l1, l2);
          const v128 x = v128_add_8(c128, o);
          const v128 a = v128_add_8(
              c128,
-            v128_from_v64(v64_load_aligned(src - (y != -y0) * stride), l1));
+            v128_from_v64(v64_load_aligned(src - (y != -y0) * sstride), l1));
          const v128 b = v128_add_8(
              c128, v128_from_v64(v64_load_unaligned(src - 2),
-                                v64_load_unaligned(src - 2 + stride)));
+                                v64_load_unaligned(src - 2 + sstride)));
          const v128 c = v128_add_8(
              c128, v128_from_v64(v64_load_unaligned(src - 1),
-                                v64_load_unaligned(src - 1 + stride)));
+                                v64_load_unaligned(src - 1 + sstride)));
          const v128 d = v128_shuffle_8(x, d_shuff);
          const v128 e = v128_shuffle_8(x, e_shuff);
          const v128 f = v128_add_8(
              c128, v128_from_v64(l2, v64_load_aligned(
-                                        src + ((y != bottom) + 1) * stride)));
+                                        src + ((y != bottom) + 1) * sstride)));
  
          const v128 tmp =
              v128_add_8(v128_max_s8(v128_min_s8(v128_ssub_s8(c, x), sp), sm),
@@ -121,35 +121,35 @@ static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
                                                          delta, v128_zero()))),
                     4));
          v64_store_aligned(dst, v128_high_v64(o));
-        v64_store_aligned(dst + stride, v128_low_v64(o));
-        src += stride * 2;
-        dst += stride * 2;
+        v64_store_aligned(dst + dstride, v128_low_v64(o));
+        src += sstride * 2;
+        dst += dstride * 2;
        }
      } else {  // No left/right clipping
        int y;
        for (y = 0; y < sizey; y += 2) {
          const v64 l1 = v64_load_aligned(src);
-        const v64 l2 = v64_load_aligned(src + stride);
+        const v64 l2 = v64_load_aligned(src + sstride);
          v128 o = v128_from_v64(l1, l2);
          const v128 x = v128_add_8(c128, o);
          const v128 a = v128_add_8(
              c128,
-            v128_from_v64(v64_load_aligned(src - (y != -y0) * stride), l1));
+            v128_from_v64(v64_load_aligned(src - (y != -y0) * sstride), l1));
          const v128 b = v128_add_8(
              c128, v128_from_v64(v64_load_unaligned(src - 2),
-                                v64_load_unaligned(src - 2 + stride)));
+                                v64_load_unaligned(src - 2 + sstride)));
          const v128 c = v128_add_8(
              c128, v128_from_v64(v64_load_unaligned(src - 1),
-                                v64_load_unaligned(src - 1 + stride)));
+                                v64_load_unaligned(src - 1 + sstride)));
          const v128 d = v128_add_8(
              c128, v128_from_v64(v64_load_unaligned(src + 1),
-                                v64_load_unaligned(src + 1 + stride)));
+                                v64_load_unaligned(src + 1 + sstride)));
          const v128 e = v128_add_8(
              c128, v128_from_v64(v64_load_unaligned(src + 2),
-                                v64_load_unaligned(src + 2 + stride)));
+                                v64_load_unaligned(src + 2 + sstride)));
          const v128 f = v128_add_8(
              c128, v128_from_v64(l2, v64_load_aligned(
-                                        src + ((y != bottom) + 1) * stride)));
+                                        src + ((y != bottom) + 1) * sstride)));
  
          const v128 tmp =
              v128_add_8(v128_max_s8(v128_min_s8(v128_ssub_s8(c, x), sp), sm),
@@ -171,17 +171,18 @@ static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
                                                          delta, v128_zero()))),
                     4));
          v64_store_aligned(dst, v128_high_v64(o));
-        v64_store_aligned(dst + stride, v128_low_v64(o));
-        src += stride * 2;
-        dst += stride * 2;
+        v64_store_aligned(dst + dstride, v128_low_v64(o));
+        src += sstride * 2;
+        dst += dstride * 2;
        }
      }
    }
  }
  
-void SIMD_FUNC(aom_clpf_block)(const uint8_t *src, uint8_t *dst, int stride,
-                               int x0, int y0, int sizex, int sizey, int width,
-                               int height, unsigned int strength) {
+void SIMD_FUNC(aom_clpf_block)(const uint8_t *src, uint8_t *dst, int sstride,
+                               int dstride, int x0, int y0, int sizex,
+                               int sizey, int width, int height,
+                               unsigned int strength) {
    // TODO(stemidts):
    // A sizex different from 8 will only be needed if CLPF is extended to chroma.
    // This will only be used if 4:2:0 and width not a multiple of 16 and along
@@ -189,9 +190,10 @@ void SIMD_FUNC(aom_clpf_block)(const uint8_t *src, uint8_t *dst, int stride,
    // this case.  If not extended to chroma, this test will be redundant.
    if (sizex != 8 || width < 16 || y0 + 8 > height || x0 + 8 > width) {
      // Fallback to C for odd sizes
-    aom_clpf_block_c(src, dst, stride, x0, y0, sizex, sizey, width, height,
-                     strength);
+    aom_clpf_block_c(src, dst, sstride, dstride, x0, y0, sizex, sizey, width,
+                     height, strength);
    } else {
-    clpf_block(src, dst, stride, x0, y0, sizey, width, height, strength);
+    clpf_block(src, dst, sstride, dstride, x0, y0, sizey, width, height,
+               strength);
    }
  }
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c

index dc18944b3a3b209bce9faf626fb55f55191efbe9..6b2de8c598e4ab959e5cbe027c65f2cb8414f4ef 100644 (file)
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3929,19 +3929,10 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
  
  #if CONFIG_CLPF
    if (cm->clpf_strength && !cm->skip_loop_filter) {
-    YV12_BUFFER_CONFIG dst;  // Buffer for the result
-
-    dst = pbi->cur_buf->buf;
-    CHECK_MEM_ERROR(cm, dst.y_buffer, aom_malloc(dst.y_stride * dst.y_height));
-
-    av1_clpf_frame(&dst, &pbi->cur_buf->buf, 0, cm, !!cm->clpf_size,
+    const YV12_BUFFER_CONFIG *const frame = &pbi->cur_buf->buf;
+    av1_clpf_frame(frame, frame, 0, cm, !!cm->clpf_size,
                     cm->clpf_strength + (cm->clpf_strength == 3),
                     4 + cm->clpf_size, cm->clpf_blocks, clpf_bit);
-
-    // Copy result
-    memcpy(pbi->cur_buf->buf.y_buffer, dst.y_buffer,
-           dst.y_height * dst.y_stride);
-    aom_free(dst.y_buffer);
    }
    if (cm->clpf_blocks) aom_free(cm->clpf_blocks);
  #endif
diff --git a/test/clpf_test.cc b/test/clpf_test.cc

index 786180b6a9e9e5fa536852a8e9343e94c5dc2d57..755d1f1466c458eef7816f37c0384ef3f453ea20 100644 (file)
--- a/test/clpf_test.cc
+++ b/test/clpf_test.cc
@@ -26,9 +26,9 @@ using libaom_test::ACMRandom;
  
  namespace {
  
-typedef void (*clpf_block_t)(const uint8_t *src, uint8_t *dst, int stride,
-                             int x0, int y0, int sizex, int sizey, int width,
-                             int height, unsigned int strength);
+typedef void (*clpf_block_t)(const uint8_t *src, uint8_t *dst, int sstride,
+                             int dstride, int x0, int y0, int sizex, int sizey,
+                             int width, int height, unsigned int strength);
  
  typedef std::tr1::tuple<clpf_block_t, clpf_block_t, int, int>
      clpf_block_param_t;
@@ -85,10 +85,10 @@ TEST_P(ClpfBlockTest, TestSIMDNoMismatch) {
        for (ypos = 0; ypos < size && !error; ypos += h * !error) {
          for (xpos = 0; xpos < size && !error; xpos += w * !error) {
            for (strength = 0; strength < 3 && !error; strength += !error) {
-            ref_clpf(s, ref_d, size, xpos, ypos, w, h, size, size,
+            ref_clpf(s, ref_d, size, size, xpos, ypos, w, h, size, size,
                       1 << strength);
-            ASM_REGISTER_STATE_CHECK(
-                clpf(s, d, size, xpos, ypos, w, h, size, size, 1 << strength));
+            ASM_REGISTER_STATE_CHECK(clpf(s, d, size, size, xpos, ypos, w, h,
+                                          size, size, 1 << strength));
  
              for (pos = 0; pos < size * size && !error; pos++) {
                error = ref_d[pos] != d[pos];
@@ -137,7 +137,8 @@ TEST_P(ClpfSpeedTest, TestSpeed) {
      for (ypos = 0; ypos < size; ypos += h) {
        for (xpos = 0; xpos < size; xpos += w) {
          for (strength = 0; strength < 3; strength++) {
-          ref_clpf(s, d, size, xpos, ypos, w, h, size, size, 1 << strength);
+          ref_clpf(s, d, size, size, xpos, ypos, w, h, size, size,
+                   1 << strength);
          }
        }
      }
@@ -150,7 +151,7 @@ TEST_P(ClpfSpeedTest, TestSpeed) {
      for (ypos = 0; ypos < size; ypos += h) {
        for (xpos = 0; xpos < size; xpos += w) {
          for (strength = 0; strength < 3; strength++) {
-          clpf(s, d, size, xpos, ypos, w, h, size, size, 1 << strength);
+          clpf(s, d, size, size, xpos, ypos, w, h, size, size, 1 << strength);
          }
        }
      }
author	Steinar Midtskogen <stemidts@cisco.com>
	Wed, 24 Aug 2016 11:00:04 +0000 (13:00 +0200)
committer	Yaowu Xu <yaowu@google.com>
	Mon, 10 Oct 2016 18:26:33 +0000 (11:26 -0700)
aom_dsp/aom_dsp_rtcd_defs.pl		patch \| blob \| history
av1/common/clpf.c		patch \| blob \| history
av1/common/clpf.h		patch \| blob \| history
av1/common/clpf_simd.h		patch \| blob \| history
av1/decoder/decodeframe.c		patch \| blob \| history
test/clpf_test.cc		patch \| blob \| history