split vpx_lpf_horizontal_16 in two

author James Zern <jzern@google.com>

Sat, 13 Feb 2016 01:42:34 +0000 (17:42 -0800)

committer James Zern <jzern@google.com>

Wed, 17 Feb 2016 06:57:45 +0000 (22:57 -0800)
author James Zern <jzern@google.com>
Sat, 13 Feb 2016 01:42:34 +0000 (17:42 -0800)
committer James Zern <jzern@google.com>
Wed, 17 Feb 2016 06:57:45 +0000 (22:57 -0800)
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc

index 0d898bc8fec2fc37cb46978b3d6d2082dc1b5fdb..9ddbf71cbe108165e87efc13c3d144614eb6635e 100644 (file)
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -523,8 +523,10 @@ INSTANTIATE_TEST_CASE_P(
      ::testing::Values(
          make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_sse2>,
                     &wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8_sse2>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_8_c>, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16_sse2>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_16_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_8_sse2>,
                     &wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_16_sse2>,
@@ -538,9 +540,10 @@ INSTANTIATE_TEST_CASE_P(
  INSTANTIATE_TEST_CASE_P(
      AVX2, Loop8Test6Param,
      ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
-                   2)));
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8_avx2>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_8_c>, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16_avx2>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_16_c>, 8, 1)));
  #endif
  
  #if HAVE_SSE2
@@ -597,10 +600,10 @@ INSTANTIATE_TEST_CASE_P(
  #if HAVE_NEON_ASM
  // Using #if inside the macro is unsupported on MSVS but the tests are not
  // currently built for MSVS with ARM and NEON.
-        make_tuple(&vpx_lpf_horizontal_16_neon,
-                   &vpx_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_16_neon,
-                   &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8_neon>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_8_c>, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16_neon>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_16_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_16_neon>,
                     &wrapper_nc<vpx_lpf_vertical_16_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_16_dual_neon>,
@@ -638,10 +641,10 @@ INSTANTIATE_TEST_CASE_P(
                     &wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_dspr2>,
                     &wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_16_dspr2,
-                   &vpx_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_16_dspr2,
-                   &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_8>, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_16>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_4_dspr2>,
                     &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_8_dspr2>,
@@ -672,8 +675,10 @@ INSTANTIATE_TEST_CASE_P(
                     &wrapper_nc<vpx_lpf_horizontal_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_msa>,
                     &wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
-        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_8_msa>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_8_c>, 8, 1),
+        make_tuple(&wrapper_nc<vpx_lpf_horizontal_edge_16_msa>,
+                   &wrapper_nc<vpx_lpf_horizontal_edge_16_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_4_msa>,
                     &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1),
          make_tuple(&wrapper_nc<vpx_lpf_vertical_8_msa>,
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c

index a659aaea416d4e40b2e543672b05c392114af647..3fe316db88f71b695132222950017db22fa4473e 100644 (file)
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -512,12 +512,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
      if (mask & 1) {
        if (mask_16x16 & 1) {
          if ((mask_16x16 & 3) == 3) {
-          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
-                                lfi->hev_thr, 2);
+          vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+                                     lfi->hev_thr);
            count = 2;
          } else {
-          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
-                                lfi->hev_thr, 1);
+          vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr);
          }
        } else if (mask_8x8 & 1) {
          if ((mask_8x8 & 3) == 3) {
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c

index 18420eff81e066c647e98de3dcf4b9be3612041c..aae0a33fa9754f3e880a1d503ad6e2fa57d62beb 100644 (file)
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -512,12 +512,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
      if (mask & 1) {
        if (mask_16x16 & 1) {
          if ((mask_16x16 & 3) == 3) {
-          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
-                                lfi->hev_thr, 2);
+          vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+                                     lfi->hev_thr);
            count = 2;
          } else {
-          vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
-                                lfi->hev_thr, 1);
+          vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+                                    lfi->hev_thr);
          }
        } else if (mask_8x8 & 1) {
          if ((mask_8x8 & 3) == 3) {
diff --git a/vpx_dsp/arm/loopfilter_mb_neon.asm b/vpx_dsp/arm/loopfilter_mb_neon.asm

index 20d9cfb1133ba81b7499bc5df5425f5218b600fd..d5da7a8409c9f0500d382847e192582bca81eb78 100644 (file)
--- a/vpx_dsp/arm/loopfilter_mb_neon.asm
+++ b/vpx_dsp/arm/loopfilter_mb_neon.asm
@@ -8,27 +8,28 @@
  ;  be found in the AUTHORS file in the root of the source tree.
  ;
  
-    EXPORT  |vpx_lpf_horizontal_16_neon|
+    EXPORT  |vpx_lpf_horizontal_edge_8_neon|
+    EXPORT  |vpx_lpf_horizontal_edge_16_neon|
      EXPORT  |vpx_lpf_vertical_16_neon|
      ARM
  
      AREA ||.text||, CODE, READONLY, ALIGN=2
  
-; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,
-;                                 const uint8_t *blimit,
-;                                 const uint8_t *limit,
-;                                 const uint8_t *thresh
-;                                 int count)
+; void mb_lpf_horizontal_edge(uint8_t *s, int p,
+;                             const uint8_t *blimit,
+;                             const uint8_t *limit,
+;                             const uint8_t *thresh,
+;                             int count)
  ; r0    uint8_t *s,
  ; r1    int p, /* pitch */
  ; r2    const uint8_t *blimit,
  ; r3    const uint8_t *limit,
  ; sp    const uint8_t *thresh,
-|vpx_lpf_horizontal_16_neon| PROC
+; r12   int count
+|mb_lpf_horizontal_edge| PROC
      push        {r4-r8, lr}
      vpush       {d8-d15}
      ldr         r4, [sp, #88]              ; load thresh
-    ldr         r12, [sp, #92]             ; load count
  
  h_count
      vld1.8      {d16[]}, [r2]              ; load *blimit
@@ -115,7 +116,35 @@ h_next
      vpop        {d8-d15}
      pop         {r4-r8, pc}
  
-    ENDP        ; |vpx_lpf_horizontal_16_neon|
+    ENDP        ; |mb_lpf_horizontal_edge|
+
+; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
+;                                     const uint8_t *blimit,
+;                                     const uint8_t *limit,
+;                                     const uint8_t *thresh)
+; r0    uint8_t *s,
+; r1    int pitch,
+; r2    const uint8_t *blimit,
+; r3    const uint8_t *limit,
+; sp    const uint8_t *thresh
+|vpx_lpf_horizontal_edge_8_neon| PROC
+    mov r12, #1
+    b mb_lpf_horizontal_edge
+    ENDP        ; |vpx_lpf_horizontal_edge_8_neon|
+
+; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
+;                                      const uint8_t *blimit,
+;                                      const uint8_t *limit,
+;                                      const uint8_t *thresh)
+; r0    uint8_t *s,
+; r1    int pitch,
+; r2    const uint8_t *blimit,
+; r3    const uint8_t *limit,
+; sp    const uint8_t *thresh
+|vpx_lpf_horizontal_edge_16_neon| PROC
+    mov r12, #2
+    b mb_lpf_horizontal_edge
+    ENDP        ; |vpx_lpf_horizontal_edge_16_neon|
  
  ; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
  ;                               const uint8_t *blimit,
diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c

index 0ca1d9d8c0757ee64f4b9790f26890f7fa4b25d3..f866a3dcf7d03ef7e54058be83d3b0b093f30cab 100644 (file)
--- a/vpx_dsp/loopfilter.c
+++ b/vpx_dsp/loopfilter.c
@@ -289,9 +289,9 @@ static INLINE void filter16(int8_t mask, uint8_t thresh,
    }
  }
  
-void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
-                             const uint8_t *limit, const uint8_t *thresh,
-                             int count) {
+static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
+                                     const uint8_t *limit,
+                                     const uint8_t *thresh, int count) {
    int i;
  
    // loop filter designed to work using chars so that we can make maximum use
@@ -315,6 +315,16 @@ void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
    }
  }
  
+void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
+                                 const uint8_t *limit, const uint8_t *thresh) {
+  mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                                  const uint8_t *limit, const uint8_t *thresh) {
+  mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
+}
+
  static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
                                     const uint8_t *blimit,
                                     const uint8_t *limit,
diff --git a/vpx_dsp/mips/loopfilter_16_msa.c b/vpx_dsp/mips/loopfilter_16_msa.c

index b7c9f7bd0e95acfd0bc7813fbc26ea3cfac22f99..a6c581d72147dcf526754e7ea1997567c0b0f288 100644 (file)
--- a/vpx_dsp/mips/loopfilter_16_msa.c
+++ b/vpx_dsp/mips/loopfilter_16_msa.c
@@ -423,11 +423,11 @@ void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
    }
  }
  
-void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
-                               const uint8_t *b_limit_ptr,
-                               const uint8_t *limit_ptr,
-                               const uint8_t *thresh_ptr,
-                               int32_t count) {
+static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
+                                   const uint8_t *b_limit_ptr,
+                                   const uint8_t *limit_ptr,
+                                   const uint8_t *thresh_ptr,
+                                   int32_t count) {
    if (1 == count) {
      uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
      uint64_t dword0, dword1;
@@ -648,6 +648,20 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
    }
  }
  
+void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch,
+                                   const uint8_t *b_limit_ptr,
+                                   const uint8_t *limit_ptr,
+                                   const uint8_t *thresh_ptr) {
+  mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch,
+                                    const uint8_t *b_limit_ptr,
+                                    const uint8_t *limit_ptr,
+                                    const uint8_t *thresh_ptr) {
+  mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2);
+}
+
  static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
                                     uint8_t *output, int32_t out_pitch) {
    v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
diff --git a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c

index 8a48650738b3d97b132b640f6d503c8bc3fde283..85e167ca054fd1a374a0f5ef27be64903ba6fa96 100644 (file)
--- a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
@@ -19,12 +19,12 @@
  #include "vpx_mem/vpx_mem.h"
  
  #if HAVE_DSPR2
-void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
-                                 int pitch,
-                                 const uint8_t *blimit,
-                                 const uint8_t *limit,
-                                 const uint8_t *thresh,
-                                 int count) {
+static void mb_lpf_horizontal_edge(unsigned char *s,
+                                   int pitch,
+                                   const uint8_t *blimit,
+                                   const uint8_t *limit,
+                                   const uint8_t *thresh,
+                                   int count) {
    uint32_t  mask;
    uint32_t  hev, flat, flat2;
    uint8_t   i;
@@ -791,4 +791,18 @@ void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
      s = s + 4;
    }
  }
+
+void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch,
+                                     const uint8_t *blimit,
+                                     const uint8_t *limit,
+                                     const uint8_t *thresh) {
+  mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch,
+                                      const uint8_t *blimit,
+                                      const uint8_t *limit,
+                                      const uint8_t *thresh) {
+  mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2);
+}
  #endif  // #if HAVE_DSPR2
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl

index 17f11eb893e253b147cbd88686e3ad2e44affcb3..557b4c55f03f8c159c063bb95fc8165b27b0c669 100644 (file)
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -548,9 +548,13 @@ specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
  add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
  specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
  
-add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
-$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
+add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
+$vpx_lpf_horizontal_edge_8_neon_asm=vpx_lpf_horizontal_edge_8_neon;
+
+add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
+$vpx_lpf_horizontal_edge_16_neon_asm=vpx_lpf_horizontal_edge_16_neon;
  
  add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
  specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
diff --git a/vpx_dsp/x86/loopfilter_avx2.c b/vpx_dsp/x86/loopfilter_avx2.c

index 23a97dd05f78ca1007bcffafdfaf6c56571c76f5..be1087c1e951a195ad867f2945853e4211dc8389 100644 (file)
--- a/vpx_dsp/x86/loopfilter_avx2.c
+++ b/vpx_dsp/x86/loopfilter_avx2.c
@@ -13,9 +13,10 @@
  #include "./vpx_dsp_rtcd.h"
  #include "vpx_ports/mem.h"
  
-static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
-        const unsigned char *_blimit, const unsigned char *_limit,
-        const unsigned char *_thresh) {
+void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p,
+                                    const unsigned char *_blimit,
+                                    const unsigned char *_limit,
+                                    const unsigned char *_thresh) {
      __m128i mask, hev, flat, flat2;
      const __m128i zero = _mm_set1_epi16(0);
      const __m128i one = _mm_set1_epi8(1);
@@ -400,9 +401,10 @@ DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = {
    8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
  };
  
-static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
-        const unsigned char *_blimit, const unsigned char *_limit,
-        const unsigned char *_thresh) {
+void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p,
+                                     const unsigned char *_blimit,
+                                     const unsigned char *_limit,
+                                     const unsigned char *_thresh) {
      __m128i mask, hev, flat, flat2;
      const __m128i zero = _mm_set1_epi16(0);
      const __m128i one = _mm_set1_epi8(1);
@@ -975,12 +977,3 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
          _mm_storeu_si128((__m128i *) (s + 6 * p), q6);
      }
  }
-
-void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
-        const unsigned char *_blimit, const unsigned char *_limit,
-        const unsigned char *_thresh, int count) {
-    if (count == 1)
-        mb_lpf_horizontal_edge_w_avx2_8(s, p, _blimit, _limit, _thresh);
-    else
-        mb_lpf_horizontal_edge_w_avx2_16(s, p, _blimit, _limit, _thresh);
-}
diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c

index e1236dc4d4082d1e5c31f2621093af3775246b28..e03508a03a161a6da4fa7acab7f794fc6c672cbd 100644 (file)
--- a/vpx_dsp/x86/loopfilter_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c
@@ -18,11 +18,10 @@ static INLINE __m128i abs_diff(__m128i a, __m128i b) {
    return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a));
  }
  
-static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
-                                            int p,
-                                            const unsigned char *_blimit,
-                                            const unsigned char *_limit,
-                                            const unsigned char *_thresh) {
+void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
+                                    const unsigned char *_blimit,
+                                    const unsigned char *_limit,
+                                    const unsigned char *_thresh) {
    const __m128i zero = _mm_set1_epi16(0);
    const __m128i one = _mm_set1_epi8(1);
    const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
@@ -383,11 +382,10 @@ static INLINE __m128i filter16_mask(const __m128i *const flat,
    return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
  }
  
-static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
-                                             int p,
-                                             const unsigned char *_blimit,
-                                             const unsigned char *_limit,
-                                             const unsigned char *_thresh) {
+void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
+                                     const unsigned char *_blimit,
+                                     const unsigned char *_limit,
+                                     const unsigned char *_thresh) {
    const __m128i zero = _mm_set1_epi16(0);
    const __m128i one = _mm_set1_epi8(1);
    const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
@@ -716,17 +714,6 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
    }
  }
  
-// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
-                                const unsigned char *_blimit,
-                                const unsigned char *_limit,
-                                const unsigned char *_thresh, int count) {
-  if (count == 1)
-    mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
-  else
-    mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
-}
-
  void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
                                 const unsigned char *_blimit,
                                 const unsigned char *_limit,
@@ -1554,7 +1541,7 @@ void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
    transpose(src, p, dst, 8, 2);
  
    // Loop filtering
-  mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, thresh);
+  vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
  
    src[0] = t_dst;
    src[1] = t_dst + 8 * 8;
@@ -1575,8 +1562,7 @@ void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
    transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
  
    // Loop filtering
-  mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit,
-                                   thresh);
+  vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
  
    // Transpose back
    transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
author	James Zern <jzern@google.com>
	Sat, 13 Feb 2016 01:42:34 +0000 (17:42 -0800)
committer	James Zern <jzern@google.com>
	Wed, 17 Feb 2016 06:57:45 +0000 (22:57 -0800)
test/lpf_8_test.cc		patch \| blob \| history
vp10/common/loopfilter.c		patch \| blob \| history
vp9/common/vp9_loopfilter.c		patch \| blob \| history
vpx_dsp/arm/loopfilter_mb_neon.asm		patch \| blob \| history
vpx_dsp/loopfilter.c		patch \| blob \| history
vpx_dsp/mips/loopfilter_16_msa.c		patch \| blob \| history
vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c		patch \| blob \| history
vpx_dsp/vpx_dsp_rtcd_defs.pl		patch \| blob \| history
vpx_dsp/x86/loopfilter_avx2.c		patch \| blob \| history
vpx_dsp/x86/loopfilter_sse2.c		patch \| blob \| history