; unsigned char *src,
; int src_stride,
; )
-global sym(vp8_intra_pred_uv_ho_mmx2)
-sym(vp8_intra_pred_uv_ho_mmx2):
+%macro vp8_intra_pred_uv_ho 1
+global sym(vp8_intra_pred_uv_ho_%1)
+sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
+%ifidn %1, ssse3
+ push rbx
+%endif
; end prolog
; read from left and write out
+%ifidn %1, mmx2
mov edx, 4
+%endif
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
+%ifidn %1, ssse3
+ lea rbx, [rax*3]
+ lea rdx, [rcx*3]
+ movdqa xmm2, [GLOBAL(dc_00001111)]
+%endif
dec rsi
-vp8_intra_pred_uv_ho_mmx2_loop:
+%ifidn %1, mmx2
+vp8_intra_pred_uv_ho_%1_loop:
movd mm0, [rsi]
movd mm1, [rsi+rax]
punpcklbw mm0, mm0
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rcx*2]
dec edx
- jnz vp8_intra_pred_uv_ho_mmx2_loop
+ jnz vp8_intra_pred_uv_ho_%1_loop
+%else
+ movd xmm0, [rsi]
+ movd xmm3, [rsi+rax]
+ movd xmm1, [rsi+rax*2]
+ movd xmm4, [rsi+rbx]
+ punpcklbw xmm0, xmm3
+ punpcklbw xmm1, xmm4
+ pshufb xmm0, xmm2
+ pshufb xmm1, xmm2
+ movq [rdi ], xmm0
+ movhps [rdi+rcx], xmm0
+ movq [rdi+rcx*2], xmm1
+ movhps [rdi+rdx], xmm1
+ lea rsi, [rsi+rax*4]
+ lea rdi, [rdi+rcx*4]
+ movd xmm0, [rsi]
+ movd xmm3, [rsi+rax]
+ movd xmm1, [rsi+rax*2]
+ movd xmm4, [rsi+rbx]
+ punpcklbw xmm0, xmm3
+ punpcklbw xmm1, xmm4
+ pshufb xmm0, xmm2
+ pshufb xmm1, xmm2
+ movq [rdi ], xmm0
+ movhps [rdi+rcx], xmm0
+ movq [rdi+rcx*2], xmm1
+ movhps [rdi+rdx], xmm1
+%endif
; begin epilog
+%ifidn %1, ssse3
+ pop rbx
+%endif
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
+%endmacro
+
+vp8_intra_pred_uv_ho mmx2
+vp8_intra_pred_uv_ho ssse3
SECTION_RODATA
dc_128:
align 16
dc_1024:
times 8 dw 0x400
+align 16
+dc_00001111:
+ times 8 db 0
+ times 8 db 1
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2);
+extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_ssse3);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
unsigned char *dst_u,
unsigned char *dst_v,
int dst_stride,
- build_intra_predictors_mbuv_fn_t tm_func)
+ build_intra_predictors_mbuv_fn_t tm_func,
+ build_intra_predictors_mbuv_fn_t ho_func)
{
int mode = x->mode_info_context->mbmi.uv_mode;
build_intra_predictors_mbuv_fn_t fn;
switch (mode) {
case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
- case H_PRED: fn = vp8_intra_pred_uv_ho_mmx2; break;
+ case H_PRED: fn = ho_func; break;
case TM_PRED: fn = tm_func; break;
case DC_PRED:
if (x->up_available) {
{
vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
&x->predictor[320], 8,
- vp8_intra_pred_uv_tm_sse2);
+ vp8_intra_pred_uv_tm_sse2,
+ vp8_intra_pred_uv_ho_mmx2);
}
void vp8_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
&x->predictor[320], 8,
- vp8_intra_pred_uv_tm_ssse3);
+ vp8_intra_pred_uv_tm_ssse3,
+ vp8_intra_pred_uv_ho_ssse3);
}
void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
x->dst.v_buffer, x->dst.uv_stride,
- vp8_intra_pred_uv_tm_sse2);
+ vp8_intra_pred_uv_tm_sse2,
+ vp8_intra_pred_uv_ho_mmx2);
}
void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
x->dst.v_buffer, x->dst.uv_stride,
- vp8_intra_pred_uv_tm_ssse3);
+ vp8_intra_pred_uv_tm_ssse3,
+ vp8_intra_pred_uv_ho_ssse3);
}