; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_loop_filter_horizontal_edge_16_neon|
+ EXPORT |vp9_lpf_horizontal_4_dual_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_loop_filter_horizontal_edge_16_neon(uint8_t *s, int p,
-; const uint8_t *blimit0,
-; const uint8_t *limit0,
-; const uint8_t *thresh0,
-; const uint8_t *blimit1,
-; const uint8_t *limit1,
-; const uint8_t *thresh1)
+;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
+; const uint8_t *blimit0,
+; const uint8_t *limit0,
+; const uint8_t *thresh0,
+; const uint8_t *blimit1,
+; const uint8_t *limit1,
+; const uint8_t *thresh1)
; r0 uint8_t *s,
; r1 int p,
; r2 const uint8_t *blimit0,
; sp+8 const uint8_t *limit1,
; sp+12 const uint8_t *thresh1,
-|vp9_loop_filter_horizontal_edge_16_neon| PROC
+|vp9_lpf_horizontal_4_dual_neon| PROC
push {lr}
ldr r12, [sp, #4] ; load thresh0
vpop {d8-d15} ; restore neon registers
pop {pc}
- ENDP ; |vp9_loop_filter_horizontal_edge_16_neon|
+ ENDP ; |vp9_lpf_horizontal_4_dual_neon|
; void vp9_loop_filter_neon_16();
; This is a helper function for the loopfilters. The invidual functions do the
#include "./vp9_rtcd.h"
-void vp9_mbloop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_horizontal_edge(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_horizontal_edge(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_8(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_8(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_loop_filter_vertical_edge_16_neon(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mbloop_filter_vertical_edge_16_neon(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1,
- 1);
+void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mb_lpf_vertical_edge_w_16_neon(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
- vp9_mb_lpf_vertical_edge_w_neon(s, p, blimit, limit, thresh);
- vp9_mb_lpf_vertical_edge_w_neon(s + 8 * p, p, blimit, limit, thresh);
+void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh) {
+ vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+ vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
}
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_loop_filter_horizontal_edge_neon|
- EXPORT |vp9_loop_filter_vertical_edge_neon|
- EXPORT |vp9_mbloop_filter_horizontal_edge_neon|
- EXPORT |vp9_mbloop_filter_vertical_edge_neon|
+ EXPORT |vp9_lpf_horizontal_4_neon|
+ EXPORT |vp9_lpf_vertical_4_neon|
+ EXPORT |vp9_lpf_horizontal_8_neon|
+ EXPORT |vp9_lpf_vertical_8_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_loop_filter_horizontal_edge_neon(uint8_t *s,
-; int p /* pitch */,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; void vp9_lpf_horizontal_4_neon(uint8_t *s,
+; int p /* pitch */,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_loop_filter_horizontal_edge_neon| PROC
+|vp9_lpf_horizontal_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
end_vp9_lf_h_edge
pop {pc}
- ENDP ; |vp9_loop_filter_horizontal_edge_neon|
+ ENDP ; |vp9_lpf_horizontal_4_neon|
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_loop_filter_vertical_edge_neon(uint8_t *s,
-; int p /* pitch */,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; void vp9_lpf_vertical_4_neon(uint8_t *s,
+; int p /* pitch */,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_loop_filter_vertical_edge_neon| PROC
+|vp9_lpf_vertical_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
end_vp9_lf_v_edge
pop {pc}
- ENDP ; |vp9_loop_filter_vertical_edge_neon|
+ ENDP ; |vp9_lpf_vertical_4_neon|
; void vp9_loop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
bx lr
ENDP ; |vp9_loop_filter_neon|
-; void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int p,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_mbloop_filter_horizontal_edge_neon| PROC
+|vp9_lpf_horizontal_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
end_vp9_mblf_h_edge
pop {r4-r5, pc}
- ENDP ; |vp9_mbloop_filter_horizontal_edge_neon|
+ ENDP ; |vp9_lpf_horizontal_8_neon|
-; void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s,
-; int pitch,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; void vp9_lpf_vertical_8_neon(uint8_t *s,
+; int pitch,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
;
; r0 uint8_t *s,
; r1 int pitch,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_mbloop_filter_vertical_edge_neon| PROC
+|vp9_lpf_vertical_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
end_vp9_mblf_v_edge
pop {r4-r5, pc}
- ENDP ; |vp9_mbloop_filter_vertical_edge_neon|
+ ENDP ; |vp9_lpf_vertical_8_neon|
; void vp9_mbloop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_mb_lpf_horizontal_edge_w_neon|
- EXPORT |vp9_mb_lpf_vertical_edge_w_neon|
+ EXPORT |vp9_lpf_horizontal_16_neon|
+ EXPORT |vp9_lpf_vertical_16_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh
-; int count)
+; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh
+; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vp9_mb_lpf_horizontal_edge_w_neon| PROC
+|vp9_lpf_horizontal_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon|
+ ENDP ; |vp9_lpf_horizontal_16_neon|
-; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh)
+; void vp9_lpf_vertical_16_neon(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vp9_mb_lpf_vertical_edge_w_neon| PROC
+|vp9_lpf_vertical_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vp9_mb_lpf_vertical_edge_w_neon|
+ ENDP ; |vp9_lpf_vertical_16_neon|
; void vp9_wide_mbfilter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
#if HAVE_DSPR2
-void vp9_loop_filter_horizontal_edge_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint8_t i;
uint32_t mask;
uint32_t hev;
}
}
-void vp9_loop_filter_vertical_edge_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_vertical_4_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint8_t i;
uint32_t mask, hev;
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
}
}
-void vp9_loop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mbloop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1,
- 1);
+void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_loop_filter_vertical_edge_16_dspr2(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
- 1);
+void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mbloop_filter_vertical_edge_16_dspr2(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
+void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
1);
}
-void vp9_mb_lpf_vertical_edge_w_16_dspr2(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
- vp9_mb_lpf_vertical_edge_w_dspr2(s, p, blimit, limit, thresh);
- vp9_mb_lpf_vertical_edge_w_dspr2(s + 8 * p, p, blimit, limit, thresh);
+void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh) {
+ vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
+ vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
}
#endif // #if HAVE_DSPR2
#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
#if HAVE_DSPR2
-void vp9_mbloop_filter_horizontal_edge_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint32_t mask;
uint32_t hev, flat;
uint8_t i;
}
}
-void vp9_mbloop_filter_vertical_edge_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_vertical_8_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint8_t i;
uint32_t mask, hev, flat;
uint8_t *s1, *s2, *s3, *s4;
#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
#if HAVE_DSPR2
-void vp9_mb_lpf_horizontal_edge_w_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint32_t mask;
uint32_t hev, flat, flat2;
uint8_t i;
#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
#if HAVE_DSPR2
-void vp9_mb_lpf_vertical_edge_w_dspr2(uint8_t *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dspr2(uint8_t *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh) {
uint8_t i;
uint32_t mask, hev, flat, flat2;
uint8_t *s1, *s2, *s3, *s4;
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
+ vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr);
} else if (mask_16x16_0 & 1) {
- vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
+ vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr);
} else {
- vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vp9_mbloop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
} else if (mask_8x8_0 & 1) {
- vp9_mbloop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ 1);
} else {
- vp9_mbloop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1);
+ vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, 1);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vp9_loop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
} else if (mask_4x4_0 & 1) {
- vp9_loop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ 1);
} else {
- vp9_loop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1);
+ vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, 1);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vp9_loop_filter_vertical_edge_16(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) {
- vp9_loop_filter_vertical_edge(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, 1);
} else {
- vp9_loop_filter_vertical_edge(s + 8 *pitch + 4, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1);
+ vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, 1);
}
}
}
if (mask & 1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
- vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 2);
+ vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 2);
count = 2;
} else {
- vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
}
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_mbloop_filter_horizontal_edge_16(s, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim,
- lfin->hev_thr);
+ vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, lfin->mblim, lfin->lim,
+ lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim,
- lfin->hev_thr);
+ vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, lfin->mblim,
+ lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
- vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
- lfin->mblim, lfin->lim,
- lfin->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
- vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (mask_4x4_int & 1)
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
}
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_loop_filter_horizontal_edge_16(s, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim,
- lfin->hev_thr);
+ vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, lfin->mblim, lfin->lim,
+ lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim,
- lfin->hev_thr);
+ vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, lfin->mblim,
+ lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
- vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
- lfin->mblim, lfin->lim,
- lfin->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
- vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
- if (mask_4x4_int & 1)
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ if (mask_4x4_int & 1)
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
}
} else if (mask_4x4_int & 1) {
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
}
}
s += 8 * count;
if (mask & 1) {
if (mask_16x16 & 1) {
- vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr);
+ vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_8x8 & 1) {
- vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
} else if (mask_4x4 & 1) {
- vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
}
if (mask_4x4_int & 1)
- vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
}
-void vp9_loop_filter_horizontal_edge_c(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit, const uint8_t *limit,
+ const uint8_t *thresh, int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
}
}
-void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
}
}
-void vp9_loop_filter_vertical_edge_16_c(uint8_t *s, int pitch,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
}
}
-void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
}
}
-void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int count) {
int i;
for (i = 0; i < 8 * count; ++i) {
}
}
-void vp9_mbloop_filter_vertical_edge_16_c(uint8_t *s, int pitch,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
}
}
-void vp9_mb_lpf_horizontal_edge_w_c(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
}
}
-void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
+void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
}
-void vp9_mb_lpf_vertical_edge_w_16_c(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
}
#
# Loopfilter
#
-prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_mb_lpf_vertical_edge_w sse2 neon dspr2
+prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
+specialize vp9_lpf_vertical_16 sse2 neon dspr2
-prototype void vp9_mb_lpf_vertical_edge_w_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_mb_lpf_vertical_edge_w_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
+specialize vp9_lpf_vertical_16_dual sse2 neon dspr2
-prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mbloop_filter_vertical_edge sse2 neon dspr2
+prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_vertical_8 sse2 neon dspr2
-prototype void vp9_mbloop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_mbloop_filter_vertical_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_vertical_8_dual sse2 neon dspr2
-prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_loop_filter_vertical_edge mmx neon dspr2
+prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_vertical_4 mmx neon dspr2
-prototype void vp9_loop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_loop_filter_vertical_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_vertical_4_dual sse2 neon dspr2
-prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mb_lpf_horizontal_edge_w sse2 avx2 neon dspr2
+prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2
-prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mbloop_filter_horizontal_edge sse2 neon dspr2
+prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_8 sse2 neon dspr2
-prototype void vp9_mbloop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_mbloop_filter_horizontal_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2
-prototype void vp9_loop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_loop_filter_horizontal_edge mmx neon dspr2
+prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_4 mmx neon dspr2
-prototype void vp9_loop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_loop_filter_horizontal_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2
#
# post proc
}
}
-void vp9_mb_lpf_horizontal_edge_w_avx2(unsigned char *s, int p,
+void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p,
const unsigned char *_blimit, const unsigned char *_limit,
const unsigned char *_thresh, int count) {
if (count == 1)
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s,
- int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh,
- int count) {
+void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh, int count) {
if (count == 1)
mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
else
mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
}
-void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s,
- int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh,
- int count) {
+void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh, int count) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16);
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16);
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16);
}
}
-void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p,
- const uint8_t *_blimit0,
- const uint8_t *_limit0,
- const uint8_t *_thresh0,
- const uint8_t *_blimit1,
- const uint8_t *_limit1,
- const uint8_t *_thresh1) {
+void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
+ const uint8_t *_blimit0,
+ const uint8_t *_limit0,
+ const uint8_t *_thresh0,
+ const uint8_t *_blimit1,
+ const uint8_t *_limit1,
+ const uint8_t *_thresh1) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16);
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16);
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16);
}
}
-void vp9_loop_filter_horizontal_edge_16_sse2(unsigned char *s,
- int p,
- const unsigned char *_blimit0,
- const unsigned char *_limit0,
- const unsigned char *_thresh0,
- const unsigned char *_blimit1,
- const unsigned char *_limit1,
- const unsigned char *_thresh1) {
+void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit0,
+ const unsigned char *_limit0,
+ const unsigned char *_thresh0,
+ const unsigned char *_blimit1,
+ const unsigned char *_limit1,
+ const unsigned char *_thresh1) {
const __m128i blimit =
_mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0),
_mm_load_si128((const __m128i *)_blimit1));
} while (++idx8x8 < num_8x8_to_transpose);
}
-void vp9_loop_filter_vertical_edge_16_sse2(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
+void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8);
unsigned char *src[2];
unsigned char *dst[2];
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_loop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
- thresh0, blimit1, limit1, thresh1);
+ vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+ blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
dst[0] = s - 4;
transpose(src, 16, dst, p, 2);
}
-void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, int p,
- const unsigned char *blimit,
- const unsigned char *limit,
- const unsigned char *thresh,
- int count) {
+void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh, int count) {
DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 8);
unsigned char *src[1];
unsigned char *dst[1];
transpose(src, p, dst, 8, 1);
// Loop filtering
- vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 4 * 8, 8, blimit, limit,
- thresh, 1);
+ vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
src[0] = t_dst;
dst[0] = s - 4;
transpose(src, 8, dst, p, 1);
}
-void vp9_mbloop_filter_vertical_edge_16_sse2(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
+void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8);
unsigned char *src[2];
unsigned char *dst[2];
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_mbloop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
- thresh0, blimit1, limit1, thresh1);
+ vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+ blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
transpose(src, 16, dst, p, 2);
}
-void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, int p,
- const unsigned char *blimit,
- const unsigned char *limit,
- const unsigned char *thresh) {
+void vp9_lpf_vertical_16_sse2(unsigned char *s, int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh) {
DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 16);
unsigned char *src[2];
unsigned char *dst[2];
transpose(src, 8, dst, p, 2);
}
-void vp9_mb_lpf_vertical_edge_w_16_sse2(unsigned char *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
+ const uint8_t *blimit, const uint8_t *limit,
+ const uint8_t *thresh) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256);
// Transpose 16x16
%include "vpx_ports/x86_abi_support.asm"
-;void vp9_loop_filter_horizontal_edge_mmx
+;void vp9_lpf_horizontal_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *thresh,
; int count
;)
-global sym(vp9_loop_filter_horizontal_edge_mmx) PRIVATE
-sym(vp9_loop_filter_horizontal_edge_mmx):
+global sym(vp9_lpf_horizontal_4_mmx) PRIVATE
+sym(vp9_lpf_horizontal_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
ret
-;void vp9_loop_filter_vertical_edge_mmx
+;void vp9_lpf_vertical_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *thresh,
; int count
;)
-global sym(vp9_loop_filter_vertical_edge_mmx) PRIVATE
-sym(vp9_loop_filter_vertical_edge_mmx):
+global sym(vp9_lpf_vertical_4_mmx) PRIVATE
+sym(vp9_lpf_vertical_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6