vpx_lpf_horizontal_4: remove unused count param

author James Zern <jzern@google.com>

Fri, 12 Feb 2016 04:26:54 +0000 (20:26 -0800)

committer James Zern <jzern@google.com>

Wed, 17 Feb 2016 06:57:27 +0000 (22:57 -0800)
author James Zern <jzern@google.com>
Fri, 12 Feb 2016 04:26:54 +0000 (20:26 -0800)
committer James Zern <jzern@google.com>
Wed, 17 Feb 2016 06:57:27 +0000 (22:57 -0800)
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc

index 394360e2fb9ed0f65d26be06028768a9f50fe6b5..e6fe1e508aefffa05b1ac9170177a484efabbf5f 100644 (file)
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -458,7 +458,8 @@ using std::tr1::make_tuple;
  INSTANTIATE_TEST_CASE_P(
      MMX, Loop8Test6Param,
      ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_4_mmx>,
+                   &wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_4_mmx>,
                     &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
  #endif  // HAVE_MMX
@@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P(
                     &wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_8_neon>,
                     &wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_4_neon,
-                   &vpx_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_4_neon>,
+                   &wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_4_neon>,
                     &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
  INSTANTIATE_TEST_CASE_P(
@@ -633,7 +634,8 @@ INSTANTIATE_TEST_CASE_P(
  INSTANTIATE_TEST_CASE_P(
      DSPR2, Loop8Test6Param,
      ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_4_dspr2>,
+                   &wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_dspr2>,
                     &wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
          make_tuple(&vpx_lpf_horizontal_16_dspr2,
@@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P(
  INSTANTIATE_TEST_CASE_P(
      MSA, Loop8Test6Param,
      ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_4_msa>,
+                   &wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_msa>,
                     &wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
          make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c

index 6bbf191ac9ad5a410c5ecfba706b11302f02c757..1f7ce981f5f313559d4909cc2f1d69242435344f 100644 (file)
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -535,10 +535,10 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
            } else {
              if (mask_4x4_int & 1)
                vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                   lfi->hev_thr, 1);
+                                   lfi->hev_thr);
              else if (mask_4x4_int & 2)
                vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
-                                   lfin->lim, lfin->hev_thr, 1);
+                                   lfin->lim, lfin->hev_thr);
            }
            count = 2;
          } else {
@@ -546,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
  
            if (mask_4x4_int & 1)
              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                 lfi->hev_thr, 1);
+                                 lfi->hev_thr);
          }
        } else if (mask_4x4 & 1) {
          if ((mask_4x4 & 3) == 3) {
@@ -563,22 +563,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
            } else {
              if (mask_4x4_int & 1)
                vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                   lfi->hev_thr, 1);
+                                   lfi->hev_thr);
              else if (mask_4x4_int & 2)
                vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
-                                   lfin->lim, lfin->hev_thr, 1);
+                                   lfin->lim, lfin->hev_thr);
            }
            count = 2;
          } else {
-          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
  
            if (mask_4x4_int & 1)
              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                 lfi->hev_thr, 1);
+                                 lfi->hev_thr);
          }
        } else if (mask_4x4_int & 1) {
          vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                             lfi->hev_thr, 1);
+                             lfi->hev_thr);
        }
      }
      s += 8 * count;
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c

index d5431c2c202e788d2a3473dc36fdc337b2e8876c..7cc833e1930ac2fb036c8cf5213d87e6185b2fe0 100644 (file)
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -535,10 +535,10 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
            } else {
              if (mask_4x4_int & 1)
                vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                   lfi->hev_thr, 1);
+                                   lfi->hev_thr);
              else if (mask_4x4_int & 2)
                vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
-                                   lfin->lim, lfin->hev_thr, 1);
+                                   lfin->lim, lfin->hev_thr);
            }
            count = 2;
          } else {
@@ -546,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
  
            if (mask_4x4_int & 1)
              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                 lfi->hev_thr, 1);
+                                 lfi->hev_thr);
          }
        } else if (mask_4x4 & 1) {
          if ((mask_4x4 & 3) == 3) {
@@ -563,22 +563,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
            } else {
              if (mask_4x4_int & 1)
                vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                   lfi->hev_thr, 1);
+                                   lfi->hev_thr);
              else if (mask_4x4_int & 2)
                vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
-                                   lfin->lim, lfin->hev_thr, 1);
+                                   lfin->lim, lfin->hev_thr);
            }
            count = 2;
          } else {
-          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
  
            if (mask_4x4_int & 1)
              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                                 lfi->hev_thr, 1);
+                                 lfi->hev_thr);
          }
        } else if (mask_4x4_int & 1) {
          vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
-                             lfi->hev_thr, 1);
+                             lfi->hev_thr);
        }
      }
      s += 8 * count;
diff --git a/vpx_dsp/arm/loopfilter_4_neon.asm b/vpx_dsp/arm/loopfilter_4_neon.asm

index d794f552a57ed29fc0ba34389bc6aa6645617615..9371158984eec5ac185500921f65766838e197bd 100644 (file)
--- a/vpx_dsp/arm/loopfilter_4_neon.asm
+++ b/vpx_dsp/arm/loopfilter_4_neon.asm
@@ -16,37 +16,28 @@
  
  ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
  ; works on 16 iterations at a time.
-; TODO(fgalligan): See about removing the count code as this function is only
-; called with a count of 1.
  ;
  ; void vpx_lpf_horizontal_4_neon(uint8_t *s,
  ;                                int p /* pitch */,
  ;                                const uint8_t *blimit,
  ;                                const uint8_t *limit,
-;                                const uint8_t *thresh,
-;                                int count)
+;                                const uint8_t *thresh)
  ;
  ; r0    uint8_t *s,
  ; r1    int p, /* pitch */
  ; r2    const uint8_t *blimit,
  ; r3    const uint8_t *limit,
  ; sp    const uint8_t *thresh,
-; sp+4  int count
  |vpx_lpf_horizontal_4_neon| PROC
      push        {lr}
  
      vld1.8      {d0[]}, [r2]               ; duplicate *blimit
-    ldr         r12, [sp, #8]              ; load count
      ldr         r2, [sp, #4]               ; load thresh
      add         r1, r1, r1                 ; double pitch
  
-    cmp         r12, #0
-    beq         end_vpx_lf_h_edge
-
      vld1.8      {d1[]}, [r3]               ; duplicate *limit
      vld1.8      {d2[]}, [r2]               ; duplicate *thresh
  
-count_lf_h_loop
      sub         r2, r0, r1, lsl #1         ; move src pointer down by 4 lines
      add         r3, r2, r1, lsr #1         ; set to 3 lines down
  
@@ -69,11 +60,6 @@ count_lf_h_loop
      vst1.u8     {d6}, [r2@64], r1          ; store oq0
      vst1.u8     {d7}, [r3@64], r1          ; store oq1
  
-    add         r0, r0, #8
-    subs        r12, r12, #1
-    bne         count_lf_h_loop
-
-end_vpx_lf_h_edge
      pop         {pc}
      ENDP        ; |vpx_lpf_horizontal_4_neon|
  
diff --git a/vpx_dsp/arm/loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c

index db9ea6a9dccc2aae3257ad36a84ce89b426de32f..7f3ee70b94873aef38a4d37efe4909e732192f49 100644 (file)
--- a/vpx_dsp/arm/loopfilter_4_neon.c
+++ b/vpx_dsp/arm/loopfilter_4_neon.c
@@ -115,22 +115,18 @@ void vpx_lpf_horizontal_4_neon(
          int pitch,
          const uint8_t *blimit,
          const uint8_t *limit,
-        const uint8_t *thresh,
-        int count) {
+        const uint8_t *thresh) {
      int i;
      uint8_t *s, *psrc;
      uint8x8_t dblimit, dlimit, dthresh;
      uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
  
-    if (count == 0)  // end_vpx_lf_h_edge
-        return;
-
      dblimit = vld1_u8(blimit);
      dlimit = vld1_u8(limit);
      dthresh = vld1_u8(thresh);
  
      psrc = src - (pitch << 2);
-    for (i = 0; i < count; i++) {
+    for (i = 0; i < 1; i++) {
          s = psrc + i * 8;
  
          d3u8 = vld1_u8(s);
diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c

index e8092d912698e0db855869f24c22f6ae48c2ea27..e545d36ab331a81bec3f10c2ff1979929ca30827 100644 (file)
--- a/vpx_dsp/loopfilter.c
+++ b/vpx_dsp/loopfilter.c
@@ -119,12 +119,12 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1,
  
  void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
                              const uint8_t *blimit, const uint8_t *limit,
-                            const uint8_t *thresh, int count) {
+                            const uint8_t *thresh) {
    int i;
  
    // loop filter designed to work using chars so that we can make maximum use
    // of 8 bit simd instructions.
-  for (i = 0; i < 8 * count; ++i) {
+  for (i = 0; i < 8; ++i) {
      const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
      const uint8_t q0 = s[0 * p],  q1 = s[1 * p],  q2 = s[2 * p],  q3 = s[3 * p];
      const int8_t mask = filter_mask(*limit, *blimit,
@@ -138,8 +138,8 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
                                   const uint8_t *limit0, const uint8_t *thresh0,
                                   const uint8_t *blimit1, const uint8_t *limit1,
                                   const uint8_t *thresh1) {
-  vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
-  vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
+  vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
+  vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1);
  }
  
  void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
diff --git a/vpx_dsp/mips/loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c

index ebeaddd21bacf4ca22b3827f244d9a5e0e71cd70..936347031ea871fe7d69618d3de742609a00b286 100644 (file)
--- a/vpx_dsp/mips/loopfilter_4_msa.c
+++ b/vpx_dsp/mips/loopfilter_4_msa.c
@@ -13,14 +13,11 @@
  void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
                                const uint8_t *b_limit_ptr,
                                const uint8_t *limit_ptr,
-                              const uint8_t *thresh_ptr,
-                              int32_t count) {
+                              const uint8_t *thresh_ptr) {
    uint64_t p1_d, p0_d, q0_d, q1_d;
    v16u8 mask, hev, flat, thresh, b_limit, limit;
    v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
  
-  (void)count;
-
    /* load vector elements */
    LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
  
diff --git a/vpx_dsp/mips/loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c

index 8a24372cb322ea2adb7f1df82af38832a10aa025..8414b9ed53f840891913f388a66affe759dba73e 100644 (file)
--- a/vpx_dsp/mips/loopfilter_filters_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c
@@ -23,8 +23,7 @@ void vpx_lpf_horizontal_4_dspr2(unsigned char *s,
                                  int pitch,
                                  const uint8_t *blimit,
                                  const uint8_t *limit,
-                                const uint8_t *thresh,
-                                int count) {
+                                const uint8_t *thresh) {
    uint8_t   i;
    uint32_t  mask;
    uint32_t  hev;
@@ -312,8 +311,8 @@ void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
                                       const uint8_t *blimit1,
                                       const uint8_t *limit1,
                                       const uint8_t *thresh1) {
-  vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
-  vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+  vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
+  vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
  }
  
  void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl

index 3f63a5f62ff7ac366b2ded3f3dc13e6bcbaa81c9..36c89db8fbcb07276b86a67af8092bf9da41118d 100644 (file)
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -559,7 +559,7 @@ add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint
  specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
  $vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
  
-add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
  specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
  
  add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
diff --git a/vpx_dsp/x86/loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm

index dee565ce04971d6e0d185e575620cdf61639ceea..15105e3ed7fc173e15c227dc3543c6ef83687a3a 100644 (file)
--- a/vpx_dsp/x86/loopfilter_mmx.asm
+++ b/vpx_dsp/x86/loopfilter_mmx.asm
@@ -18,14 +18,13 @@
  ;    int src_pixel_step,
  ;    const char *blimit,
  ;    const char *limit,
-;    const char *thresh,
-;    int  count
+;    const char *thresh
  ;)
  global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
  sym(vpx_lpf_horizontal_4_mmx):
      push        rbp
      mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
+    SHADOW_ARGS_TO_STACK 5
      GET_GOT     rbx
      push        rsi
      push        rdi
@@ -39,8 +38,6 @@ sym(vpx_lpf_horizontal_4_mmx):
          mov         rsi, arg(0) ;src_ptr
          movsxd      rax, dword ptr arg(1) ;src_pixel_step     ; destination pitch?
  
-        movsxd      rcx, dword ptr arg(5) ;count
-.next8_h:
          mov         rdx, arg(3) ;limit
          movq        mm7, [rdx]
          mov         rdi, rsi              ; rdi points to row +1 for indirect addressing
@@ -208,11 +205,6 @@ sym(vpx_lpf_horizontal_4_mmx):
          pxor        mm7, [GLOBAL(t80)]    ; unoffset
          movq        [rdi], mm7            ; write back
  
-        add         rsi,8
-        neg         rax
-        dec         rcx
-        jnz         .next8_h
-
      add rsp, 32
      pop rsp
      ; begin epilog
author	James Zern <jzern@google.com>
	Fri, 12 Feb 2016 04:26:54 +0000 (20:26 -0800)
committer	James Zern <jzern@google.com>
	Wed, 17 Feb 2016 06:57:27 +0000 (22:57 -0800)
test/lpf_8_test.cc		patch \| blob \| history
vp10/common/loopfilter.c		patch \| blob \| history
vp9/common/vp9_loopfilter.c		patch \| blob \| history
vpx_dsp/arm/loopfilter_4_neon.asm		patch \| blob \| history
vpx_dsp/arm/loopfilter_4_neon.c		patch \| blob \| history
vpx_dsp/loopfilter.c		patch \| blob \| history
vpx_dsp/mips/loopfilter_4_msa.c		patch \| blob \| history
vpx_dsp/mips/loopfilter_filters_dspr2.c		patch \| blob \| history
vpx_dsp/vpx_dsp_rtcd_defs.pl		patch \| blob \| history
vpx_dsp/x86/loopfilter_mmx.asm		patch \| blob \| history