From: Jim Bankoski <jimbankoski@google.com> Date: Wed, 18 Dec 2013 01:34:34 +0000 (-0800) Subject: rename loop filter functions X-Git-Tag: v1.4.0~2804^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b720ba165f20e3d2e5b06e5ee479075298718791;p=libvpx rename loop filter functions This renames all the loop filter functions so that they no longer refer to mb Change-Id: I8a58a8c7fd253d835cb619bde13913e896ece90b --- diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm index 751bc74bc..5b8ec2028 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm +++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm @@ -8,18 +8,18 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_loop_filter_horizontal_edge_16_neon| + EXPORT |vp9_lpf_horizontal_4_dual_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp9_loop_filter_horizontal_edge_16_neon(uint8_t *s, int p, -; const uint8_t *blimit0, -; const uint8_t *limit0, -; const uint8_t *thresh0, -; const uint8_t *blimit1, -; const uint8_t *limit1, -; const uint8_t *thresh1) +;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p, +; const uint8_t *blimit0, +; const uint8_t *limit0, +; const uint8_t *thresh0, +; const uint8_t *blimit1, +; const uint8_t *limit1, +; const uint8_t *thresh1) ; r0 uint8_t *s, ; r1 int p, ; r2 const uint8_t *blimit0, @@ -29,7 +29,7 @@ ; sp+8 const uint8_t *limit1, ; sp+12 const uint8_t *thresh1, -|vp9_loop_filter_horizontal_edge_16_neon| PROC +|vp9_lpf_horizontal_4_dual_neon| PROC push {lr} ldr r12, [sp, #4] ; load thresh0 @@ -76,7 +76,7 @@ vpop {d8-d15} ; restore neon registers pop {pc} - ENDP ; |vp9_loop_filter_horizontal_edge_16_neon| + ENDP ; |vp9_lpf_horizontal_4_dual_neon| ; void vp9_loop_filter_neon_16(); ; This is a helper function for the loopfilters. The invidual functions do the diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c index b97e7aa4a..0820db247 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c +++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c @@ -10,44 +10,43 @@ #include "./vp9_rtcd.h" -void vp9_mbloop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_mbloop_filter_horizontal_edge(s, p, blimit0, limit0, thresh0, 1); - vp9_mbloop_filter_horizontal_edge(s + 8, p, blimit1, limit1, thresh1, 1); +void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_horizontal_8(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_horizontal_8(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_loop_filter_vertical_edge_16_neon(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_loop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1); - vp9_loop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); +void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); } -void vp9_mbloop_filter_vertical_edge_16_neon(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_mbloop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1); - vp9_mbloop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1, - 1); +void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); } -void vp9_mb_lpf_vertical_edge_w_16_neon(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - vp9_mb_lpf_vertical_edge_w_neon(s, p, blimit, limit, thresh); - vp9_mb_lpf_vertical_edge_w_neon(s + 8 * p, p, blimit, limit, thresh); +void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh); + vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh); } diff --git a/vp9/common/arm/neon/vp9_loopfilter_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_neon.asm index 8b4fe5dcc..443032217 100644 --- a/vp9/common/arm/neon/vp9_loopfilter_neon.asm +++ b/vp9/common/arm/neon/vp9_loopfilter_neon.asm @@ -8,10 +8,10 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_loop_filter_horizontal_edge_neon| - EXPORT |vp9_loop_filter_vertical_edge_neon| - EXPORT |vp9_mbloop_filter_horizontal_edge_neon| - EXPORT |vp9_mbloop_filter_vertical_edge_neon| + EXPORT |vp9_lpf_horizontal_4_neon| + EXPORT |vp9_lpf_vertical_4_neon| + EXPORT |vp9_lpf_horizontal_8_neon| + EXPORT |vp9_lpf_vertical_8_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 @@ -21,12 +21,12 @@ ; TODO(fgalligan): See about removing the count code as this function is only ; called with a count of 1. ; -; void vp9_loop_filter_horizontal_edge_neon(uint8_t *s, -; int p /* pitch */, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; void vp9_lpf_horizontal_4_neon(uint8_t *s, +; int p /* pitch */, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh, +; int count) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ @@ -34,7 +34,7 @@ ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; sp+4 int count -|vp9_loop_filter_horizontal_edge_neon| PROC +|vp9_lpf_horizontal_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit @@ -77,19 +77,19 @@ count_lf_h_loop end_vp9_lf_h_edge pop {pc} - ENDP ; |vp9_loop_filter_horizontal_edge_neon| + ENDP ; |vp9_lpf_horizontal_4_neon| ; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. ; TODO(fgalligan): See about removing the count code as this function is only ; called with a count of 1. ; -; void vp9_loop_filter_vertical_edge_neon(uint8_t *s, -; int p /* pitch */, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; void vp9_lpf_vertical_4_neon(uint8_t *s, +; int p /* pitch */, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh, +; int count) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ @@ -97,7 +97,7 @@ end_vp9_lf_h_edge ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; sp+4 int count -|vp9_loop_filter_vertical_edge_neon| PROC +|vp9_lpf_vertical_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit @@ -158,7 +158,7 @@ count_lf_v_loop end_vp9_lf_v_edge pop {pc} - ENDP ; |vp9_loop_filter_vertical_edge_neon| + ENDP ; |vp9_lpf_vertical_4_neon| ; void vp9_loop_filter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the @@ -276,18 +276,18 @@ end_vp9_lf_v_edge bx lr ENDP ; |vp9_loop_filter_neon| -; void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh, +; int count) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; sp+4 int count -|vp9_mbloop_filter_horizontal_edge_neon| PROC +|vp9_lpf_horizontal_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit @@ -333,14 +333,14 @@ count_mblf_h_loop end_vp9_mblf_h_edge pop {r4-r5, pc} - ENDP ; |vp9_mbloop_filter_horizontal_edge_neon| + ENDP ; |vp9_lpf_horizontal_8_neon| -; void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s, -; int pitch, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh, -; int count) +; void vp9_lpf_vertical_8_neon(uint8_t *s, +; int pitch, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh, +; int count) ; ; r0 uint8_t *s, ; r1 int pitch, @@ -348,7 +348,7 @@ end_vp9_mblf_h_edge ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; sp+4 int count -|vp9_mbloop_filter_vertical_edge_neon| PROC +|vp9_lpf_vertical_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit @@ -420,7 +420,7 @@ count_mblf_v_loop end_vp9_mblf_v_edge pop {r4-r5, pc} - ENDP ; |vp9_mbloop_filter_vertical_edge_neon| + ENDP ; |vp9_lpf_vertical_8_neon| ; void vp9_mbloop_filter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the diff --git a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm index 2e8001b91..8cb913cb8 100644 --- a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm +++ b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm @@ -8,23 +8,23 @@ ; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp9_mb_lpf_horizontal_edge_w_neon| - EXPORT |vp9_mb_lpf_vertical_edge_w_neon| + EXPORT |vp9_lpf_horizontal_16_neon| + EXPORT |vp9_lpf_vertical_16_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 -; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh -; int count) +; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh +; int count) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -|vp9_mb_lpf_horizontal_edge_w_neon| PROC +|vp9_lpf_horizontal_16_neon| PROC push {r4-r8, lr} vpush {d8-d15} ldr r4, [sp, #88] ; load thresh @@ -115,18 +115,18 @@ h_next vpop {d8-d15} pop {r4-r8, pc} - ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon| + ENDP ; |vp9_lpf_horizontal_16_neon| -; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p, -; const uint8_t *blimit, -; const uint8_t *limit, -; const uint8_t *thresh) +; void vp9_lpf_vertical_16_neon(uint8_t *s, int p, +; const uint8_t *blimit, +; const uint8_t *limit, +; const uint8_t *thresh) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, -|vp9_mb_lpf_vertical_edge_w_neon| PROC +|vp9_lpf_vertical_16_neon| PROC push {r4-r8, lr} vpush {d8-d15} ldr r4, [sp, #88] ; load thresh @@ -279,7 +279,7 @@ v_end vpop {d8-d15} pop {r4-r8, pc} - ENDP ; |vp9_mb_lpf_vertical_edge_w_neon| + ENDP ; |vp9_lpf_vertical_16_neon| ; void vp9_wide_mbfilter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c index 0c0f155ae..3df7f4c9f 100644 --- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c @@ -20,12 +20,12 @@ #include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h" #if HAVE_DSPR2 -void vp9_loop_filter_horizontal_edge_dspr2(unsigned char *s, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_horizontal_4_dspr2(unsigned char *s, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { uint8_t i; uint32_t mask; uint32_t hev; @@ -114,12 +114,12 @@ void vp9_loop_filter_horizontal_edge_dspr2(unsigned char *s, } } -void vp9_loop_filter_vertical_edge_dspr2(unsigned char *s, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_vertical_4_dspr2(unsigned char *s, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { uint8_t i; uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; @@ -307,58 +307,56 @@ void vp9_loop_filter_vertical_edge_dspr2(unsigned char *s, } } -void vp9_loop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_loop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1); - vp9_loop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); +void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_mbloop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_mbloop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1); - vp9_mbloop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1, - 1); +void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_loop_filter_vertical_edge_16_dspr2(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_loop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1); - vp9_loop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, - 1); +void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); } -void vp9_mbloop_filter_vertical_edge_16_dspr2(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_mbloop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1); - vp9_mbloop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, +void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, + const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); } -void vp9_mb_lpf_vertical_edge_w_16_dspr2(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { - vp9_mb_lpf_vertical_edge_w_dspr2(s, p, blimit, limit, thresh); - vp9_mb_lpf_vertical_edge_w_dspr2(s + 8 * p, p, blimit, limit, thresh); +void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { + vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh); + vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh); } #endif // #if HAVE_DSPR2 diff --git a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c index adfd755c1..7cd0b632b 100644 --- a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c @@ -20,12 +20,12 @@ #include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h" #if HAVE_DSPR2 -void vp9_mbloop_filter_horizontal_edge_dspr2(unsigned char *s, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_horizontal_8_dspr2(unsigned char *s, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { uint32_t mask; uint32_t hev, flat; uint8_t i; @@ -319,12 +319,12 @@ void vp9_mbloop_filter_horizontal_edge_dspr2(unsigned char *s, } } -void vp9_mbloop_filter_vertical_edge_dspr2(unsigned char *s, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_vertical_8_dspr2(unsigned char *s, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { uint8_t i; uint32_t mask, hev, flat; uint8_t *s1, *s2, *s3, *s4; diff --git a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c index 075975536..6c946742e 100644 --- a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c @@ -20,12 +20,12 @@ #include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h" #if HAVE_DSPR2 -void vp9_mb_lpf_horizontal_edge_w_dspr2(unsigned char *s, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_horizontal_16_dspr2(unsigned char *s, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh, + int count) { uint32_t mask; uint32_t hev, flat, flat2; uint8_t i; diff --git a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c index 9e9171c4a..851fc6c2d 100644 --- a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c +++ b/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c @@ -20,11 +20,11 @@ #include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h" #if HAVE_DSPR2 -void vp9_mb_lpf_vertical_edge_w_dspr2(uint8_t *s, - int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { +void vp9_lpf_vertical_16_dspr2(uint8_t *s, + int pitch, + const uint8_t *blimit, + const uint8_t *limit, + const uint8_t *thresh) { uint8_t i; uint32_t mask, hev, flat, flat2; uint8_t *s1, *s2, *s3, *s4; diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index 72adf928a..437ab5706 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -355,56 +355,56 @@ static void filter_selectively_vert_row2(PLANE_TYPE plane_type, if (mask & 1) { if ((mask_16x16_0 | mask_16x16_1) & 1) { if ((mask_16x16_0 & mask_16x16_1) & 1) { - vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); + vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr); } else if (mask_16x16_0 & 1) { - vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); + vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr); } else { - vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); + vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, + lfi1->lim, lfi1->hev_thr); } } if ((mask_8x8_0 | mask_8x8_1) & 1) { if ((mask_8x8_0 & mask_8x8_1) & 1) { - vp9_mbloop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); + vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, lfi1->lim, + lfi1->hev_thr); } else if (mask_8x8_0 & 1) { - vp9_mbloop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, + 1); } else { - vp9_mbloop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1); + vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, + lfi1->hev_thr, 1); } } if ((mask_4x4_0 | mask_4x4_1) & 1) { if ((mask_4x4_0 & mask_4x4_1) & 1) { - vp9_loop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); + vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, lfi1->lim, + lfi1->hev_thr); } else if (mask_4x4_0 & 1) { - vp9_loop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, + 1); } else { - vp9_loop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1); + vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, + lfi1->hev_thr, 1); } } if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vp9_loop_filter_vertical_edge_16(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); + vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, lfi1->mblim, lfi1->lim, + lfi1->hev_thr); } else if (mask_4x4_int_0 & 1) { - vp9_loop_filter_vertical_edge(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, + lfi0->hev_thr, 1); } else { - vp9_loop_filter_vertical_edge(s + 8 *pitch + 4, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1); + vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, + lfi1->hev_thr, 1); } } } @@ -440,81 +440,73 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2); + vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 2); count = 2; } else { - vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); - vp9_mbloop_filter_horizontal_edge_16(s, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr); + vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr); if ((mask_4x4_int & 3) == 3) { - vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr); + vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, lfin->mblim, + lfin->lim, lfin->hev_thr); } else { if (mask_4x4_int & 1) - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); else if (mask_4x4_int & 2) - vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, - lfin->mblim, lfin->lim, - lfin->hev_thr, 1); + vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr, 1); } count = 2; } else { - vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (mask_4x4_int & 1) - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); - vp9_loop_filter_horizontal_edge_16(s, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr); + vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr); if ((mask_4x4_int & 3) == 3) { - vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, - lfin->mblim, lfin->lim, - lfin->hev_thr); + vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, lfin->mblim, + lfin->lim, lfin->hev_thr); } else { if (mask_4x4_int & 1) - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); else if (mask_4x4_int & 2) - vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch, - lfin->mblim, lfin->lim, - lfin->hev_thr, 1); + vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr, 1); } count = 2; } else { - vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); - if (mask_4x4_int & 1) - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + if (mask_4x4_int & 1) + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); } } else if (mask_4x4_int & 1) { - vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1); + vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, 1); } } s += 8 * count; @@ -940,19 +932,15 @@ static void filter_selectively_vert(uint8_t *s, int pitch, if (mask & 1) { if (mask_16x16 & 1) { - vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr); + vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_8x8 & 1) { - vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } else if (mask_4x4 & 1) { - vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } } if (mask_4x4_int & 1) - vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); s += 8; lfl += 1; mask_16x16 >>= 1; diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c index f2e910f5b..bbbad01e4 100644 --- a/vp9/common/vp9_loopfilter_filters.c +++ b/vp9/common/vp9_loopfilter_filters.c @@ -101,11 +101,9 @@ static INLINE void filter4(int8_t mask, uint8_t hev, uint8_t *op1, *op1 = signed_char_clamp(ps1 + filter) ^ 0x80; } -void vp9_loop_filter_horizontal_edge_c(uint8_t *s, int p /* pitch */, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh, int count) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -121,22 +119,17 @@ void vp9_loop_filter_horizontal_edge_c(uint8_t *s, int p /* pitch */, } } -void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_loop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1); - vp9_loop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1); +void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int count) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -152,15 +145,12 @@ void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch, } } -void vp9_loop_filter_vertical_edge_16_c(uint8_t *s, int pitch, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_loop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1); - vp9_loop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1, +void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1); + vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1, 1); } @@ -185,11 +175,9 @@ static INLINE void filter8(int8_t mask, uint8_t hev, uint8_t flat, } } -void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int count) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -208,22 +196,17 @@ void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int p, } } -void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_mbloop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1); - vp9_mbloop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1); +void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1); + vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1); } -void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int count) { int i; for (i = 0; i < 8 * count; ++i) { @@ -239,15 +222,12 @@ void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch, } } -void vp9_mbloop_filter_vertical_edge_16_c(uint8_t *s, int pitch, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { - vp9_mbloop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1); - vp9_mbloop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1, +void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, + const uint8_t *limit0, const uint8_t *thresh0, + const uint8_t *blimit1, const uint8_t *limit1, + const uint8_t *thresh1) { + vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1); + vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1, 1); } @@ -302,11 +282,9 @@ static INLINE void filter16(int8_t mask, uint8_t hev, } } -void vp9_mb_lpf_horizontal_edge_w_c(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh, - int count) { +void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh, + int count) { int i; // loop filter designed to work using chars so that we can make maximum use @@ -355,16 +333,12 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, } } -void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { +void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); } -void vp9_mb_lpf_vertical_edge_w_16_c(uint8_t *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { +void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, + const uint8_t *limit, const uint8_t *thresh) { mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16); } diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 727f5c437..3025ed42c 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -191,38 +191,38 @@ specialize vp9_dc_128_predictor_32x32 # # Loopfilter # -prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" -specialize vp9_mb_lpf_vertical_edge_w sse2 neon dspr2 +prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" +specialize vp9_lpf_vertical_16 sse2 neon dspr2 -prototype void vp9_mb_lpf_vertical_edge_w_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" -specialize vp9_mb_lpf_vertical_edge_w_16 sse2 neon dspr2 +prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh" +specialize vp9_lpf_vertical_16_dual sse2 neon dspr2 -prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_mbloop_filter_vertical_edge sse2 neon dspr2 +prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_lpf_vertical_8 sse2 neon dspr2 -prototype void vp9_mbloop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_mbloop_filter_vertical_edge_16 sse2 neon dspr2 +prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" +specialize vp9_lpf_vertical_8_dual sse2 neon dspr2 -prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_loop_filter_vertical_edge mmx neon dspr2 +prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_lpf_vertical_4 mmx neon dspr2 -prototype void vp9_loop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_loop_filter_vertical_edge_16 sse2 neon dspr2 +prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" +specialize vp9_lpf_vertical_4_dual sse2 neon dspr2 -prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_mb_lpf_horizontal_edge_w sse2 avx2 neon dspr2 +prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2 -prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_mbloop_filter_horizontal_edge sse2 neon dspr2 +prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_lpf_horizontal_8 sse2 neon dspr2 -prototype void vp9_mbloop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_mbloop_filter_horizontal_edge_16 sse2 neon dspr2 +prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" +specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2 -prototype void vp9_loop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" -specialize vp9_loop_filter_horizontal_edge mmx neon dspr2 +prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count" +specialize vp9_lpf_horizontal_4 mmx neon dspr2 -prototype void vp9_loop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" -specialize vp9_loop_filter_horizontal_edge_16 sse2 neon dspr2 +prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1" +specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2 # # post proc diff --git a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c index 3c5cb8ffd..439c028f2 100644 --- a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c +++ b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c @@ -933,7 +933,7 @@ static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p, } } -void vp9_mb_lpf_horizontal_edge_w_avx2(unsigned char *s, int p, +void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p, const unsigned char *_blimit, const unsigned char *_limit, const unsigned char *_thresh, int count) { if (count == 1) diff --git a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c index 3ca55cfc3..448ad5af7 100644 --- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c +++ b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c @@ -846,24 +846,20 @@ static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s, } // TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly. -void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s, - int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh, - int count) { +void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh, int count) { if (count == 1) mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh); else mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh); } -void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s, - int p, - const unsigned char *_blimit, - const unsigned char *_limit, - const unsigned char *_thresh, - int count) { +void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p, + const unsigned char *_blimit, + const unsigned char *_limit, + const unsigned char *_thresh, int count) { DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16); DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16); DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16); @@ -1083,13 +1079,13 @@ void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s, } } -void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p, - const uint8_t *_blimit0, - const uint8_t *_limit0, - const uint8_t *_thresh0, - const uint8_t *_blimit1, - const uint8_t *_limit1, - const uint8_t *_thresh1) { +void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p, + const uint8_t *_blimit0, + const uint8_t *_limit0, + const uint8_t *_thresh0, + const uint8_t *_blimit1, + const uint8_t *_limit1, + const uint8_t *_thresh1) { DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16); DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16); DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16); @@ -1330,14 +1326,13 @@ void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p, } } -void vp9_loop_filter_horizontal_edge_16_sse2(unsigned char *s, - int p, - const unsigned char *_blimit0, - const unsigned char *_limit0, - const unsigned char *_thresh0, - const unsigned char *_blimit1, - const unsigned char *_limit1, - const unsigned char *_thresh1) { +void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p, + const unsigned char *_blimit0, + const unsigned char *_limit0, + const unsigned char *_thresh0, + const unsigned char *_blimit1, + const unsigned char *_limit1, + const unsigned char *_thresh1) { const __m128i blimit = _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0), _mm_load_si128((const __m128i *)_blimit1)); @@ -1598,13 +1593,12 @@ static INLINE void transpose(unsigned char *src[], int in_p, } while (++idx8x8 < num_8x8_to_transpose); } -void vp9_loop_filter_vertical_edge_16_sse2(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { +void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8); unsigned char *src[2]; unsigned char *dst[2]; @@ -1613,8 +1607,8 @@ void vp9_loop_filter_vertical_edge_16_sse2(uint8_t *s, int p, transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); // Loop filtering - vp9_loop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0, - thresh0, blimit1, limit1, thresh1); + vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, + blimit1, limit1, thresh1); src[0] = t_dst; src[1] = t_dst + 8; dst[0] = s - 4; @@ -1624,11 +1618,10 @@ void vp9_loop_filter_vertical_edge_16_sse2(uint8_t *s, int p, transpose(src, 16, dst, p, 2); } -void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh, - int count) { +void vp9_lpf_vertical_8_sse2(unsigned char *s, int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh, int count) { DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 8); unsigned char *src[1]; unsigned char *dst[1]; @@ -1641,8 +1634,7 @@ void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, int p, transpose(src, p, dst, 8, 1); // Loop filtering - vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 4 * 8, 8, blimit, limit, - thresh, 1); + vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1); src[0] = t_dst; dst[0] = s - 4; @@ -1651,13 +1643,12 @@ void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, int p, transpose(src, 8, dst, p, 1); } -void vp9_mbloop_filter_vertical_edge_16_sse2(uint8_t *s, int p, - const uint8_t *blimit0, - const uint8_t *limit0, - const uint8_t *thresh0, - const uint8_t *blimit1, - const uint8_t *limit1, - const uint8_t *thresh1) { +void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0, + const uint8_t *limit0, + const uint8_t *thresh0, + const uint8_t *blimit1, + const uint8_t *limit1, + const uint8_t *thresh1) { DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8); unsigned char *src[2]; unsigned char *dst[2]; @@ -1666,8 +1657,8 @@ void vp9_mbloop_filter_vertical_edge_16_sse2(uint8_t *s, int p, transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16); // Loop filtering - vp9_mbloop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0, - thresh0, blimit1, limit1, thresh1); + vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, + blimit1, limit1, thresh1); src[0] = t_dst; src[1] = t_dst + 8; @@ -1678,10 +1669,10 @@ void vp9_mbloop_filter_vertical_edge_16_sse2(uint8_t *s, int p, transpose(src, 16, dst, p, 2); } -void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, int p, - const unsigned char *blimit, - const unsigned char *limit, - const unsigned char *thresh) { +void vp9_lpf_vertical_16_sse2(unsigned char *s, int p, + const unsigned char *blimit, + const unsigned char *limit, + const unsigned char *thresh) { DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 16); unsigned char *src[2]; unsigned char *dst[2]; @@ -1706,10 +1697,9 @@ void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, int p, transpose(src, 8, dst, p, 2); } -void vp9_mb_lpf_vertical_edge_w_16_sse2(unsigned char *s, int p, - const uint8_t *blimit, - const uint8_t *limit, - const uint8_t *thresh) { +void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p, + const uint8_t *blimit, const uint8_t *limit, + const uint8_t *thresh) { DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256); // Transpose 16x16 diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vp9/common/x86/vp9_loopfilter_mmx.asm index 4ebb51b77..a7f69307d 100644 --- a/vp9/common/x86/vp9_loopfilter_mmx.asm +++ b/vp9/common/x86/vp9_loopfilter_mmx.asm @@ -12,7 +12,7 @@ %include "vpx_ports/x86_abi_support.asm" -;void vp9_loop_filter_horizontal_edge_mmx +;void vp9_lpf_horizontal_4_mmx ;( ; unsigned char *src_ptr, ; int src_pixel_step, @@ -21,8 +21,8 @@ ; const char *thresh, ; int count ;) -global sym(vp9_loop_filter_horizontal_edge_mmx) PRIVATE -sym(vp9_loop_filter_horizontal_edge_mmx): +global sym(vp9_lpf_horizontal_4_mmx) PRIVATE +sym(vp9_lpf_horizontal_4_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 @@ -224,7 +224,7 @@ sym(vp9_loop_filter_horizontal_edge_mmx): ret -;void vp9_loop_filter_vertical_edge_mmx +;void vp9_lpf_vertical_4_mmx ;( ; unsigned char *src_ptr, ; int src_pixel_step, @@ -233,8 +233,8 @@ sym(vp9_loop_filter_horizontal_edge_mmx): ; const char *thresh, ; int count ;) -global sym(vp9_loop_filter_vertical_edge_mmx) PRIVATE -sym(vp9_loop_filter_vertical_edge_mmx): +global sym(vp9_lpf_vertical_4_mmx) PRIVATE +sym(vp9_lpf_vertical_4_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6