From 01433c50436a669b7e10faf94382dbe03a8827bf Mon Sep 17 00:00:00 2001 From: Johann Date: Mon, 20 Jun 2011 14:48:57 -0400 Subject: [PATCH] update x86 asm for loopfilter Change-Id: I1ed739522db7c00c189851c7095c1b64ef6412ce --- vp8/common/x86/loopfilter_mmx.asm | 78 +++++------- vp8/common/x86/loopfilter_sse2.asm | 63 ++++------ vp8/common/x86/loopfilter_x86.c | 170 +++++++-------------------- vp8/common/x86/loopfilter_x86.h | 24 ++-- vp8/common/x86/x86_systemdependent.c | 10 +- 5 files changed, 111 insertions(+), 234 deletions(-) diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm index c6c215c3c..ad47284cf 100644 --- a/vp8/common/x86/loopfilter_mmx.asm +++ b/vp8/common/x86/loopfilter_mmx.asm @@ -16,7 +16,7 @@ ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -122,12 +122,10 @@ next8_h: paddusb mm5, mm5 ; abs(p0-q0)*2 paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - mov rdx, arg(2) ;flimit ; get flimit - movq mm2, [rdx] ; flimit mm2 - paddb mm2, mm2 ; flimit*2 (less than 255) - paddb mm7, mm2 ; flimit * 2 + limit (less than 255) + mov rdx, arg(2) ;blimit ; get blimit + movq mm7, [rdx] ; blimit - psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit por mm1, mm5 pxor mm5, mm5 pcmpeqb mm1, mm5 ; mask mm1 @@ -230,7 +228,7 @@ next8_h: ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -406,9 +404,9 @@ next8_v: pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm5, 1 ; abs(p1-q1)/2 - mov rdx, arg(2) ;flimit ; + mov rdx, arg(2) ;blimit ; - movq mm2, [rdx] ;flimit mm2 + movq mm4, [rdx] ;blimit movq mm1, mm3 ; mm1=mm3=p0 movq mm7, mm6 ; mm7=mm6=q0 @@ -419,10 +417,7 @@ next8_v: paddusb mm1, mm1 ; abs(q0-p0)*2 paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - paddb mm2, mm2 ; flimit*2 (less than 255) - paddb mm4, mm2 ; flimit * 2 + limit (less than 255) - - psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit por mm1, mm0; ; mask pxor mm0, mm0 @@ -603,7 +598,7 @@ next8_v: ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -719,17 +714,15 @@ next8_mbh: paddusb mm5, mm5 ; abs(p0-q0)*2 paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - mov rdx, arg(2) ;flimit ; get flimit - movq mm2, [rdx] ; flimit mm2 - paddb mm2, mm2 ; flimit*2 (less than 255) - paddb mm7, mm2 ; flimit * 2 + limit (less than 255) + mov rdx, arg(2) ;blimit ; get blimit + movq mm7, [rdx] ; blimit - psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit por mm1, mm5 pxor mm5, mm5 pcmpeqb mm1, mm5 ; mask mm1 - ; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0) + ; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0) ; mm6 = p0, ; calculate high edge variance @@ -922,7 +915,7 @@ next8_mbh: ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -1108,9 +1101,9 @@ next8_mbv: pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm5, 1 ; abs(p1-q1)/2 - mov rdx, arg(2) ;flimit ; + mov rdx, arg(2) ;blimit ; - movq mm2, [rdx] ;flimit mm2 + movq mm4, [rdx] ;blimit movq mm1, mm3 ; mm1=mm3=p0 movq mm7, mm6 ; mm7=mm6=q0 @@ -1121,10 +1114,7 @@ next8_mbv: paddusb mm1, mm1 ; abs(q0-p0)*2 paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - paddb mm2, mm2 ; flimit*2 (less than 255) - paddb mm4, mm2 ; flimit * 2 + limit (less than 255) - - psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit por mm1, mm0; ; mask pxor mm0, mm0 @@ -1392,16 +1382,13 @@ next8_mbv: ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, -; const char *limit, -; const char *thresh, -; int count +; const char *blimit ;) global sym(vp8_loop_filter_simple_horizontal_edge_mmx) sym(vp8_loop_filter_simple_horizontal_edge_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 + SHADOW_ARGS_TO_STACK 3 GET_GOT rbx push rsi push rdi @@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx): mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - movsxd rcx, dword ptr arg(5) ;count + mov rcx, 2 ; count nexts8_h: - mov rdx, arg(3) ;limit - movq mm7, [rdx] - mov rdx, arg(2) ;flimit ; get flimit + mov rdx, arg(2) ;blimit ; get blimit movq mm3, [rdx] ; - paddb mm3, mm3 ; flimit*2 (less than 255) - paddb mm3, mm7 ; flimit * 2 + limit (less than 255) mov rdi, rsi ; rdi points to row +1 for indirect addressing add rdi, rax @@ -1445,7 +1428,7 @@ nexts8_h: paddusb mm5, mm5 ; abs(p0-q0)*2 paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor mm3, mm3 pcmpeqb mm5, mm3 @@ -1515,16 +1498,13 @@ nexts8_h: ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, -; const char *limit, -; const char *thresh, -; int count +; const char *blimit ;) global sym(vp8_loop_filter_simple_vertical_edge_mmx) sym(vp8_loop_filter_simple_vertical_edge_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 + SHADOW_ARGS_TO_STACK 3 GET_GOT rbx push rsi push rdi @@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx): movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? lea rsi, [rsi + rax*4- 2]; ; - movsxd rcx, dword ptr arg(5) ;count + mov rcx, 2 ; count nexts8_v: lea rdi, [rsi + rax]; @@ -1602,14 +1582,10 @@ nexts8_v: paddusb mm5, mm5 ; abs(p0-q0)*2 paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - mov rdx, arg(2) ;flimit ; get flimit + mov rdx, arg(2) ;blimit ; get blimit movq mm7, [rdx] - mov rdx, arg(3) ; get limit - movq mm6, [rdx] - paddb mm7, mm7 ; flimit*2 (less than 255) - paddb mm7, mm6 ; flimit * 2 + limit (less than 255) - psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor mm7, mm7 pcmpeqb mm5, mm7 ; mm5 = mask diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index c2ce1a106..4efff7eb5 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -110,7 +110,7 @@ psubusb xmm6, xmm5 ; p1-=p0 por xmm6, xmm4 ; abs(p1 - p0) - mov rdx, arg(2) ; get flimit + mov rdx, arg(2) ; get blimit movdqa t1, xmm6 ; save to t1 @@ -123,7 +123,7 @@ psubusb xmm1, xmm7 por xmm2, xmm3 ; abs(p1-q1) - movdqa xmm4, XMMWORD PTR [rdx] ; flimit + movdqa xmm7, XMMWORD PTR [rdx] ; blimit movdqa xmm3, xmm0 ; q0 pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero @@ -134,13 +134,11 @@ psrlw xmm2, 1 ; abs(p1-q1)/2 psubusb xmm5, xmm3 ; p0-=q0 - paddb xmm4, xmm4 ; flimit*2 (less than 255) psubusb xmm3, xmm6 ; q0-=p0 por xmm5, xmm3 ; abs(p0 - q0) paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255) movdqa xmm4, t0 ; hev get abs (q1 - q0) @@ -150,7 +148,7 @@ movdqa xmm2, XMMWORD PTR [rdx] ; hev - psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit psubusb xmm4, xmm2 ; hev psubusb xmm3, xmm2 ; hev @@ -278,7 +276,7 @@ ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2): ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2): ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): ;( ; unsigned char *u, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; unsigned char *v @@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): movdqa xmm4, XMMWORD PTR [rdx]; limit pmaxub xmm0, xmm7 - mov rdx, arg(2) ; flimit + mov rdx, arg(2) ; blimit psubusb xmm0, xmm4 movdqa xmm5, xmm2 ; q1 @@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): psrlw xmm5, 1 ; abs(p1-q1)/2 psubusb xmm6, xmm3 ; q0-p0 - movdqa xmm2, XMMWORD PTR [rdx]; flimit + movdqa xmm4, XMMWORD PTR [rdx]; blimit mov rdx, arg(4) ; get thresh por xmm1, xmm6 ; abs(q0-p0) - paddb xmm2, xmm2 ; flimit*2 (less than 255) movdqa xmm6, t0 ; get abs (q1 - q0) @@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh - paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255) psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh - psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh por xmm1, xmm0 ; mask @@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2): ;( ; unsigned char *u, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; unsigned char *v @@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2): ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; int count @@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): ;( ; unsigned char *u, ; int src_pixel_step, -; const char *flimit, +; const char *blimit, ; const char *limit, ; const char *thresh, ; unsigned char *v @@ -1376,16 +1372,13 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2): ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, -; const char *limit, -; const char *thresh, -; int count +; const char *blimit, ;) global sym(vp8_loop_filter_simple_horizontal_edge_sse2) sym(vp8_loop_filter_simple_horizontal_edge_sse2): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 + SHADOW_ARGS_TO_STACK 3 SAVE_XMM 7 GET_GOT rbx push rsi @@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - mov rdx, arg(2) ;flimit ; get flimit + mov rdx, arg(2) ;blimit movdqa xmm3, XMMWORD PTR [rdx] - mov rdx, arg(3) ;limit - movdqa xmm7, XMMWORD PTR [rdx] - - paddb xmm3, xmm3 ; flimit*2 (less than 255) - paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255) mov rdi, rsi ; rdi points to row +1 for indirect addressing add rdi, rax @@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): paddusb xmm5, xmm5 ; abs(p0-q0)*2 paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor xmm3, xmm3 pcmpeqb xmm5, xmm3 @@ -1493,16 +1481,13 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): ;( ; unsigned char *src_ptr, ; int src_pixel_step, -; const char *flimit, -; const char *limit, -; const char *thresh, -; int count +; const char *blimit, ;) global sym(vp8_loop_filter_simple_vertical_edge_sse2) sym(vp8_loop_filter_simple_vertical_edge_sse2): push rbp ; save old base pointer value. mov rbp, rsp ; set new base pointer value. - SHADOW_ARGS_TO_STACK 6 + SHADOW_ARGS_TO_STACK 3 SAVE_XMM 7 GET_GOT rbx ; save callee-saved reg push rsi @@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): paddusb xmm5, xmm5 ; abs(p0-q0)*2 paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - mov rdx, arg(2) ;flimit + mov rdx, arg(2) ;blimit movdqa xmm7, XMMWORD PTR [rdx] - mov rdx, arg(3) ; get limit - movdqa xmm6, XMMWORD PTR [rdx] - paddb xmm7, xmm7 ; flimit*2 (less than 255) - paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255) - psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor xmm7, xmm7 pcmpeqb xmm5, xmm7 ; mm5 = mask diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c index a52420c98..9360ac17c 100644 --- a/vp8/common/x86/loopfilter_x86.c +++ b/vp8/common/x86/loopfilter_x86.c @@ -9,30 +9,18 @@ */ -#include "vpx_ports/config.h" +#include "vpx_config.h" #include "vp8/common/loopfilter.h" -prototype_loopfilter(vp8_loop_filter_horizontal_edge_c); -prototype_loopfilter(vp8_loop_filter_vertical_edge_c); -prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c); -prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c); -prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c); -prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c); - prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx); prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx); prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx); prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx); -prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx); -prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx); prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2); prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2); prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2); prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2); -prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2); -prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2); -prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2); extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; @@ -44,23 +32,13 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2; void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { - vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); + vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); + vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); -} - - -void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - (void) u_ptr; - (void) v_ptr; - (void) uv_stride; - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); + vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } @@ -68,23 +46,13 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { - vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); + vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); + vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) - vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1); -} - - -void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - (void) u_ptr; - (void) v_ptr; - (void) uv_stride; - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); + vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } @@ -92,27 +60,23 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); + vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); + vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) - vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); + vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } -void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) +void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { - (void) u_ptr; - (void) v_ptr; - (void) uv_stride; - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); + vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit); } @@ -120,27 +84,23 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { - vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); + vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); + vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) - vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1); + vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } -void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) +void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { - (void) u_ptr; - (void) v_ptr; - (void) uv_stride; - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); + vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit); } #endif @@ -150,20 +110,10 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { - vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); + vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr); -} - - -void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - (void) u_ptr; - (void) v_ptr; - (void) uv_stride; - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); + vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); } @@ -171,20 +121,10 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { - vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); + vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr); -} - - -void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) -{ - (void) u_ptr; - (void) v_ptr; - (void) uv_stride; - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2); + vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); } @@ -192,24 +132,20 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); + vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4 * uv_stride); + vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride); } -void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) +void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { - (void) u_ptr; - (void) v_ptr; - (void) uv_stride; - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); + vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit); + vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit); } @@ -217,36 +153,20 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { - vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); + vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); + vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) - vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, v_ptr + 4); + vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4); } -void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, - int y_stride, int uv_stride, loop_filter_info *lfi) +void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { - (void) u_ptr; - (void) v_ptr; - (void) uv_stride; - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); + vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit); + vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit); } #endif - -#if 0 -void vp8_fast_loop_filter_vertical_edges_sse(unsigned char *y_ptr, - int y_stride, - loop_filter_info *lfi) -{ - - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); - vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); -} -#endif diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h index 80dbebc8d..1ed6c213f 100644 --- a/vp8/common/x86/loopfilter_x86.h +++ b/vp8/common/x86/loopfilter_x86.h @@ -24,10 +24,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_mmx); extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx); extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx); extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx); -extern prototype_loopfilter_block(vp8_loop_filter_mbvs_mmx); -extern prototype_loopfilter_block(vp8_loop_filter_bvs_mmx); -extern prototype_loopfilter_block(vp8_loop_filter_mbhs_mmx); -extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx); +extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx); +extern prototype_simple_loopfilter(vp8_loop_filter_bvs_mmx); +extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx); +extern prototype_simple_loopfilter(vp8_loop_filter_bhs_mmx); #if !CONFIG_RUNTIME_CPU_DETECT @@ -44,13 +44,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx); #define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx #undef vp8_lf_simple_mb_v -#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_mmx +#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_mmx #undef vp8_lf_simple_b_v #define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx #undef vp8_lf_simple_mb_h -#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_mmx +#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_mmx #undef vp8_lf_simple_b_h #define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx @@ -63,10 +63,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_sse2); extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2); extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2); extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2); -extern prototype_loopfilter_block(vp8_loop_filter_mbvs_sse2); -extern prototype_loopfilter_block(vp8_loop_filter_bvs_sse2); -extern prototype_loopfilter_block(vp8_loop_filter_mbhs_sse2); -extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2); +extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2); +extern prototype_simple_loopfilter(vp8_loop_filter_bvs_sse2); +extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2); +extern prototype_simple_loopfilter(vp8_loop_filter_bhs_sse2); #if !CONFIG_RUNTIME_CPU_DETECT @@ -83,13 +83,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2); #define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2 #undef vp8_lf_simple_mb_v -#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_sse2 +#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_sse2 #undef vp8_lf_simple_b_v #define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2 #undef vp8_lf_simple_mb_h -#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_sse2 +#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_sse2 #undef vp8_lf_simple_b_h #define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2 diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c index 87374f3c6..33a984b79 100644 --- a/vp8/common/x86/x86_systemdependent.c +++ b/vp8/common/x86/x86_systemdependent.c @@ -9,7 +9,7 @@ */ -#include "vpx_ports/config.h" +#include "vpx_config.h" #include "vpx_ports/x86.h" #include "vp8/common/g_common.h" #include "vp8/common/subpixel.h" @@ -63,9 +63,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx; rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx; rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx; - rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx; + rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx; rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx; - rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx; + rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx; rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx; #if CONFIG_POSTPROC @@ -101,9 +101,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2; rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2; rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2; - rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2; + rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2; rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2; - rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2; + rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2; rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2; #if CONFIG_POSTPROC -- 2.40.0