From: Loren Merritt Date: Fri, 10 Feb 2006 21:58:43 +0000 (+0000) Subject: amd64 mmx for some intra pred functions X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=681b394485671f977a1a19d2279ace4c22eb0177;p=libx264 amd64 mmx for some intra pred functions git-svn-id: svn://svn.videolan.org/x264/trunk@429 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/Makefile b/Makefile index deefe36f..7580617f 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ endif # MMX/SSE optims ifeq ($(ARCH),X86_64) -SRCS += common/i386/mc-c.c common/i386/dct-c.c common/amd64/predict.c +SRCS += common/i386/mc-c.c common/i386/dct-c.c common/i386/predict.c ASMSRC = common/amd64/dct-a.asm common/amd64/cpu-a.asm \ common/amd64/pixel-a.asm common/amd64/mc-a.asm \ common/amd64/mc-a2.asm common/amd64/predict-a.asm \ diff --git a/common/amd64/predict-a.asm b/common/amd64/predict-a.asm index a6bec9bd..0bb7fb07 100644 --- a/common/amd64/predict-a.asm +++ b/common/amd64/predict-a.asm @@ -3,6 +3,8 @@ ;***************************************************************************** ;* Copyright (C) 2005 x264 project ;* +;* Authors: Loren Merritt +;* ;* This program is free software; you can redistribute it and/or modify ;* it under the terms of the GNU General Public License as published by ;* the Free Software Foundation; either version 2 of the License, or @@ -26,23 +28,139 @@ BITS 64 %include "amd64inc.asm" -;============================================================================= -; Macros -;============================================================================= - %macro SAVE_0_1 1 movq [%1] , mm0 movq [%1 + 8] , mm1 %endmacro +%macro SAVE_0_0 1 + movq [%1] , mm0 + movq [%1 + 8] , mm0 +%endmacro + + +SECTION .rodata align=16 + +ALIGN 8 +pw_2: times 4 dw 2 +pw_8: times 4 dw 8 +pb_1: times 8 db 1 +pw_3210: + dw 0 + dw 1 + dw 2 + dw 3 + ;============================================================================= ; Code ;============================================================================= SECTION .text +cglobal predict_8x8_v_mmxext +cglobal predict_8x8_dc_core_mmxext cglobal predict_8x8c_v_mmx +cglobal predict_8x8c_dc_core_mmxext +cglobal predict_8x8c_p_core_mmx +cglobal predict_16x16_p_core_mmx cglobal predict_16x16_v_mmx +cglobal predict_16x16_dc_core_mmxext +cglobal predict_16x16_dc_top_mmxext + + + +%macro PRED8x8_LOWPASS 2 + movq mm3, mm1 + pavgb mm1, mm2 + pxor mm2, mm3 + movq %1 , %2 + pand mm2, [pb_1 GLOBAL] + psubusb mm1, mm2 + pavgb %1 , mm1 ; %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2 +%endmacro + +%macro PRED8x8_LOAD_TOP 0 + sub parm1q, parm2q + + and parm3d, 12 + movq mm1, [parm1q-1] + movq mm2, [parm1q+1] + + cmp parm3d, byte 8 + jge .have_topleft + mov al, [parm1q] + mov ah, [parm1q] + pinsrw mm1, eax, 0 +.have_topleft: + + and parm3d, byte 4 + jne .have_topright + mov al, [parm1q+7] + mov ah, [parm1q+7] + pinsrw mm2, eax, 3 +.have_topright: + + PRED8x8_LOWPASS mm0, [parm1q] +%endmacro + +;----------------------------------------------------------------------------- +; +; void predict_8x8_v_mmxext( uint8_t *src, int i_stride, int i_neighbors ) +; +;----------------------------------------------------------------------------- + +ALIGN 16 +predict_8x8_v_mmxext: + PRED8x8_LOAD_TOP + + lea rax, [parm2q + 2*parm2q] + movq [parm1q + parm2q], mm0 ; 0 + movq [parm1q + 2*parm2q], mm0 ; 1 + movq [parm1q + 4*parm2q], mm0 ; 3 + movq [parm1q + 8*parm2q], mm0 ; 7 + add parm1q, rax + movq [parm1q], mm0 ; 2 + movq [parm1q + 2*parm2q], mm0 ; 4 + movq [parm1q + rax ], mm0 ; 5 + movq [parm1q + 4*parm2q], mm0 ; 6 + + ret + +;----------------------------------------------------------------------------- +; +; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, uint8_t *pix_left ); +; +;----------------------------------------------------------------------------- + +ALIGN 16 +predict_8x8_dc_core_mmxext: + movq mm1, [parm4q-1] + movq mm2, [parm4q+1] + PRED8x8_LOWPASS mm4, [parm4q] + + PRED8x8_LOAD_TOP + + pxor mm1, mm1 + psadbw mm0, mm1 + psadbw mm4, mm1 + paddw mm0, [pw_8 GLOBAL] + paddw mm0, mm4 + psrlw mm0, 4 + pshufw mm0, mm0, 0 + packuswb mm0, mm0 + + lea rax, [parm2q + 2*parm2q] + movq [parm1q + parm2q], mm0 ; 0 + movq [parm1q + 2*parm2q], mm0 ; 1 + movq [parm1q + 4*parm2q], mm0 ; 3 + movq [parm1q + 8*parm2q], mm0 ; 7 + add parm1q, rax + movq [parm1q], mm0 ; 2 + movq [parm1q + 2*parm2q], mm0 ; 4 + movq [parm1q + rax ], mm0 ; 5 + movq [parm1q + 4*parm2q], mm0 ; 6 + + ret ;----------------------------------------------------------------------------- ; @@ -68,6 +186,154 @@ predict_8x8c_v_mmx : ret +;----------------------------------------------------------------------------- +; +; void predict_8x8c_dc_core_mmxext( uint8_t *src, int i_stride, int s2, int s3 ) +; +;----------------------------------------------------------------------------- + +ALIGN 16 +predict_8x8c_dc_core_mmxext: + sub parm1q, parm2q + lea rax, [parm2q + 2*parm2q] + + movq mm0, [parm1q] + pxor mm1, mm1 + pxor mm2, mm2 + punpckhbw mm1, mm0 + punpcklbw mm0, mm2 + psadbw mm1, mm2 ; s1 + psadbw mm0, mm2 ; s0 + + movd mm4, parm3d + movd mm5, parm4d + paddw mm0, mm4 + pshufw mm2, mm5, 0 + psrlw mm0, 3 + paddw mm1, [pw_2 GLOBAL] + movq mm3, mm2 + pshufw mm1, mm1, 0 + pshufw mm0, mm0, 0 ; dc0 (w) + paddw mm3, mm1 + psrlw mm3, 3 ; dc3 (w) + psrlw mm2, 2 ; dc2 (w) + psrlw mm1, 2 ; dc1 (w) + + packuswb mm0, mm1 ; dc0,dc1 (b) + packuswb mm2, mm3 ; dc2,dc3 (b) + + movq [parm1q + parm2q], mm0 ; 0 + movq [parm1q + 2*parm2q], mm0 ; 1 + movq [parm1q + rax ], mm0 ; 2 + movq [parm1q + 4*parm2q], mm0 ; 3 + lea parm1q, [parm1q + 4*parm2q] + movq [parm1q + parm2q], mm2 ; 4 + movq [parm1q + 2*parm2q], mm2 ; 5 + movq [parm1q + rax ], mm2 ; 6 + movq [parm1q + 4*parm2q], mm2 ; 7 + + ret + +;----------------------------------------------------------------------------- +; +; void predict_8x8c_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c ) +; +;----------------------------------------------------------------------------- + +ALIGN 16 +predict_8x8c_p_core_mmx: + movd mm0, parm3d + movd mm2, parm4d + movd mm4, parm5d + pshufw mm0, mm0, 0 + pshufw mm2, mm2, 0 + pshufw mm4, mm4, 0 + movq mm1, mm2 + pmullw mm2, [pw_3210 GLOBAL] + psllw mm1, 2 + paddsw mm0, mm2 ; mm0 = {i+0*b, i+1*b, i+2*b, i+3*b} + paddsw mm1, mm0 ; mm1 = {i+4*b, i+5*b, i+6*b, i+7*b} + pxor mm5, mm5 + + mov eax, 8 +ALIGN 4 +.loop: + movq mm6, mm0 + movq mm7, mm1 + psraw mm6, 5 + psraw mm7, 5 + pmaxsw mm6, mm5 + pmaxsw mm7, mm5 + packuswb mm6, mm7 + movq [parm1q], mm6 + + paddsw mm0, mm4 + paddsw mm1, mm4 + add parm1q, parm2q + dec eax + jg .loop + + nop + ret + +;----------------------------------------------------------------------------- +; +; void predict_16x16_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c ) +; +;----------------------------------------------------------------------------- + +ALIGN 16 +predict_16x16_p_core_mmx: + movd mm0, parm3d + movd mm2, parm4d + movd mm4, parm5d + pshufw mm0, mm0, 0 + pshufw mm2, mm2, 0 + pshufw mm4, mm4, 0 + movq mm5, mm2 + movq mm1, mm2 + pmullw mm5, [pw_3210 GLOBAL] + psllw mm2, 3 + psllw mm1, 2 + movq mm3, mm2 + paddsw mm0, mm5 ; mm0 = {i+ 0*b, i+ 1*b, i+ 2*b, i+ 3*b} + paddsw mm1, mm0 ; mm1 = {i+ 4*b, i+ 5*b, i+ 6*b, i+ 7*b} + paddsw mm2, mm0 ; mm2 = {i+ 8*b, i+ 9*b, i+10*b, i+11*b} + paddsw mm3, mm1 ; mm3 = {i+12*b, i+13*b, i+14*b, i+15*b} + pxor mm5, mm5 + + mov eax, 16 +ALIGN 4 +.loop: + movq mm6, mm0 + movq mm7, mm1 + psraw mm6, 5 + psraw mm7, 5 + pmaxsw mm6, mm5 + pmaxsw mm7, mm5 + packuswb mm6, mm7 + movq [parm1q], mm6 + + movq mm6, mm2 + movq mm7, mm3 + psraw mm6, 5 + psraw mm7, 5 + pmaxsw mm6, mm5 + pmaxsw mm7, mm5 + packuswb mm6, mm7 + movq [parm1q+8], mm6 + + paddsw mm0, mm4 + paddsw mm1, mm4 + paddsw mm2, mm4 + paddsw mm3, mm4 + add parm1q, parm2q + dec eax + jg .loop + + nop + ret + ;----------------------------------------------------------------------------- ; ; void predict_16x16_v_mmx( uint8_t *src, int i_stride ) @@ -103,3 +369,48 @@ predict_16x16_v_mmx : SAVE_0_1 (parm1q + rax) ; 15 ret + +;----------------------------------------------------------------------------- +; +; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_stride, int i_dc_left ) +; +;----------------------------------------------------------------------------- + +%macro PRED16x16_DC 2 + sub parm1q, parm2q ; parm1q <-- line -1 + + pxor mm0, mm0 + pxor mm1, mm1 + psadbw mm0, [parm1q] + psadbw mm1, [parm1q + 8] + paddusw mm0, mm1 + paddusw mm0, %1 + psrlw mm0, %2 ; dc + pshufw mm0, mm0, 0 + lea r8, [parm2q + 2*parm2q] ; eax <-- 3* stride + packuswb mm0, mm0 ; dc in bytes + + mov eax, 4 +ALIGN 4 +.loop: + SAVE_0_0 (parm1q + parm2q) ; 0 + SAVE_0_0 (parm1q + 2 * parm2q) ; 1 + SAVE_0_0 (parm1q + r8 ) ; 2 + SAVE_0_0 (parm1q + 4 * parm2q) ; 3 + dec eax + lea parm1q, [parm1q + 4 * parm2q] + jg .loop + nop +%endmacro + +ALIGN 16 +predict_16x16_dc_core_mmxext: + movd mm2, parm3d + PRED16x16_DC mm2, 5 + ret + +ALIGN 16 +predict_16x16_dc_top_mmxext: + PRED16x16_DC [pw_8 GLOBAL], 4 + ret + diff --git a/common/amd64/predict.c b/common/amd64/predict.c deleted file mode 100644 index 5384134d..00000000 --- a/common/amd64/predict.c +++ /dev/null @@ -1,175 +0,0 @@ -/***************************************************************************** - * predict.c: h264 encoder - ***************************************************************************** - * Copyright (C) 2006 x264 project - * - * Authors: Loren Merritt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110 USA - *****************************************************************************/ - -#ifdef HAVE_STDINT_H -#include -#else -#include -#endif - -#include "common/predict.h" -#include "common/i386/predict.h" - -extern void predict_16x16_v_mmx( uint8_t *src, int i_stride ); -extern void predict_8x8c_v_mmx( uint8_t *src, int i_stride ); - -/**************************************************************************** - * 16x16 prediction for intra luma block - ****************************************************************************/ - -#define PREDICT_16x16_DC(v) \ - for( i = 0; i < 16; i++ )\ - {\ - uint64_t *p = (uint64_t*)src;\ - *p++ = v;\ - *p++ = v;\ - src += i_stride;\ - } - -static void predict_16x16_dc( uint8_t *src, int i_stride ) -{ - uint32_t s = 0; - uint64_t dc; - int i; - - /* calculate DC value */ - for( i = 0; i < 16; i++ ) - { - s += src[-1 + i * i_stride]; - s += src[i - i_stride]; - } - dc = (( s + 16 ) >> 5) * 0x0101010101010101ULL; - - PREDICT_16x16_DC(dc); -} -static void predict_16x16_dc_left( uint8_t *src, int i_stride ) -{ - uint32_t s = 0; - uint64_t dc; - int i; - - for( i = 0; i < 16; i++ ) - { - s += src[-1 + i * i_stride]; - } - dc = (( s + 8 ) >> 4) * 0x0101010101010101ULL; - - PREDICT_16x16_DC(dc); -} -static void predict_16x16_h( uint8_t *src, int i_stride ) -{ - int i; - for( i = 0; i < 16; i++ ) - { - const uint64_t v = 0x0101010101010101ULL * src[-1]; - uint64_t *p = (uint64_t*)src; - *p++ = v; - *p++ = v; - src += i_stride; - } -} - - -/**************************************************************************** - * 8x8 prediction for intra chroma block - ****************************************************************************/ - -static void predict_8x8c_dc_left( uint8_t *src, int i_stride ) -{ - int y; - uint32_t s0 = 0, s1 = 0; - uint64_t dc0, dc1; - - for( y = 0; y < 4; y++ ) - { - s0 += src[y * i_stride - 1]; - s1 += src[(y+4) * i_stride - 1]; - } - dc0 = (( s0 + 2 ) >> 2)*0x0101010101010101ULL; - dc1 = (( s1 + 2 ) >> 2)*0x0101010101010101ULL; - - for( y = 0; y < 4; y++ ) - { - *(uint64_t*)src = dc0; - src += i_stride; - } - for( y = 0; y < 4; y++ ) - { - *(uint64_t*)src = dc1; - src += i_stride; - } - -} -static void predict_8x8c_dc_top( uint8_t *src, int i_stride ) -{ - int y, x; - uint32_t s0 = 0, s1 = 0; - uint64_t dc; - - for( x = 0; x < 4; x++ ) - { - s0 += src[x - i_stride]; - s1 += src[x + 4 - i_stride]; - } - dc = (( s0 + 2 ) >> 2)*0x01010101 - + (( s1 + 2 ) >> 2)*0x0101010100000000ULL; - - for( y = 0; y < 8; y++ ) - { - *(uint64_t*)src = dc; - src += i_stride; - } -} -static void predict_8x8c_h( uint8_t *src, int i_stride ) -{ - int i; - for( i = 0; i < 8; i++ ) - { - *(uint64_t*)src = 0x0101010101010101ULL * src[-1]; - src += i_stride; - } -} - - -/**************************************************************************** - * Exported functions: - ****************************************************************************/ -void x264_predict_16x16_init_mmxext( x264_predict_t pf[7] ) -{ - pf[I_PRED_16x16_V ] = predict_16x16_v_mmx; - pf[I_PRED_16x16_H ] = predict_16x16_h; - pf[I_PRED_16x16_DC] = predict_16x16_dc; - pf[I_PRED_16x16_DC_LEFT]= predict_16x16_dc_left; -} - -void x264_predict_8x8c_init_mmxext( x264_predict_t pf[7] ) -{ - pf[I_PRED_CHROMA_V ] = predict_8x8c_v_mmx; - pf[I_PRED_CHROMA_H ] = predict_8x8c_h; - pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left; - pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top; -} - -void x264_predict_8x8_init_mmxext( x264_predict8x8_t pf[12] ) -{ -} - diff --git a/common/i386/predict-a.asm b/common/i386/predict-a.asm index 53a16275..beee70bb 100644 --- a/common/i386/predict-a.asm +++ b/common/i386/predict-a.asm @@ -91,7 +91,7 @@ cglobal predict_16x16_dc_top_mmxext jge .have_topleft mov al, [edx] mov ah, [edx] - pinsrw mm1, ax, 0 + pinsrw mm1, eax, 0 mov eax, [picesp + 12] .have_topleft: @@ -99,7 +99,7 @@ cglobal predict_16x16_dc_top_mmxext jne .have_topright mov al, [edx+7] mov ah, [edx+7] - pinsrw mm2, ax, 3 + pinsrw mm2, eax, 3 .have_topright: PRED8x8_LOWPASS mm0, [edx] @@ -133,7 +133,7 @@ predict_8x8_v_mmxext: ;----------------------------------------------------------------------------- ; -; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, int i_dc_left ); +; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, uint8_t *pix_left ); ; ;----------------------------------------------------------------------------- @@ -264,13 +264,9 @@ predict_8x8c_p_core_mmx: mov edx, [picesp + 4] mov ecx, [picesp + 8] - - movd mm0, [picesp +12] - movd mm2, [picesp +16] - movd mm4, [picesp +20] - pshufw mm0, mm0, 0 - pshufw mm2, mm2, 0 - pshufw mm4, mm4, 0 + pshufw mm0, [picesp +12], 0 + pshufw mm2, [picesp +16], 0 + pshufw mm4, [picesp +20], 0 movq mm1, mm2 pmullw mm2, [pw_3210 GLOBAL] psllw mm1, 2 @@ -314,13 +310,9 @@ predict_16x16_p_core_mmx: mov edx, [picesp + 4] mov ecx, [picesp + 8] - - movd mm0, [picesp +12] - movd mm2, [picesp +16] - movd mm4, [picesp +20] - pshufw mm0, mm0, 0 ; FIXME shuf these directly from memory - pshufw mm2, mm2, 0 ; if there is stack alignment? - pshufw mm4, mm4, 0 + pshufw mm0, [picesp +12], 0 + pshufw mm2, [picesp +16], 0 + pshufw mm4, [picesp +20], 0 movq mm5, mm2 movq mm1, mm2 pmullw mm5, [pw_3210 GLOBAL] diff --git a/common/i386/predict.c b/common/i386/predict.c index b6bc9c94..ed067734 100644 --- a/common/i386/predict.c +++ b/common/i386/predict.c @@ -21,14 +21,8 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ -#ifdef HAVE_STDINT_H -#include -#else -#include -#endif - -#include "common/clip1.h" #include "common/common.h" +#include "common/clip1.h" #include "predict.h" extern void predict_16x16_v_mmx( uint8_t *src, int i_stride ); @@ -132,27 +126,130 @@ static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor ) predict_8x8_dc_core_mmxext( src, i_stride, i_neighbor, l+1 ); } +#ifdef ARCH_X86_64 +static void predict_16x16_h( uint8_t *src, int i_stride ) +{ + int y; + for( y = 0; y < 16; y++ ) + { + const uint64_t v = 0x0101010101010101ULL * src[-1]; + uint64_t *p = (uint64_t*)src; + p[0] = p[1] = v; + src += i_stride; + } +} + +static void predict_8x8c_h( uint8_t *src, int i_stride ) +{ + int y; + for( y = 0; y < 8; y++ ) + { + *(uint64_t*)src = 0x0101010101010101ULL * src[-1]; + src += i_stride; + } +} + +static void predict_16x16_dc_left( uint8_t *src, int i_stride ) +{ + uint32_t s = 0; + uint64_t dc; + int y; + + for( y = 0; y < 16; y++ ) + { + s += src[-1 + y * i_stride]; + } + dc = (( s + 8 ) >> 4) * 0x0101010101010101ULL; + + for( y = 0; y < 16; y++ ) + { + uint64_t *p = (uint64_t*)src; + p[0] = p[1] = dc; + src += i_stride; + } +} + +static void predict_8x8c_dc_left( uint8_t *src, int i_stride ) +{ + int y; + uint32_t s0 = 0, s1 = 0; + uint64_t dc0, dc1; + + for( y = 0; y < 4; y++ ) + { + s0 += src[y * i_stride - 1]; + s1 += src[(y+4) * i_stride - 1]; + } + dc0 = (( s0 + 2 ) >> 2) * 0x0101010101010101ULL; + dc1 = (( s1 + 2 ) >> 2) * 0x0101010101010101ULL; + + for( y = 0; y < 4; y++ ) + { + *(uint64_t*)src = dc0; + src += i_stride; + } + for( y = 0; y < 4; y++ ) + { + *(uint64_t*)src = dc1; + src += i_stride; + } + +} + +static void predict_8x8c_dc_top( uint8_t *src, int i_stride ) +{ + int y, x; + uint32_t s0 = 0, s1 = 0; + uint64_t dc; + + for( x = 0; x < 4; x++ ) + { + s0 += src[x - i_stride]; + s1 += src[x + 4 - i_stride]; + } + dc = (( s0 + 2 ) >> 2) * 0x01010101 + + (( s1 + 2 ) >> 2) * 0x0101010100000000ULL; + + for( y = 0; y < 8; y++ ) + { + *(uint64_t*)src = dc; + src += i_stride; + } +} +#endif + /**************************************************************************** * Exported functions: ****************************************************************************/ void x264_predict_16x16_init_mmxext( x264_predict_t pf[7] ) { - pf[I_PRED_16x16_V] = predict_16x16_v_mmx; - pf[I_PRED_16x16_DC] = predict_16x16_dc; - pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_mmxext; - pf[I_PRED_16x16_P] = predict_16x16_p; + pf[I_PRED_16x16_V] = predict_16x16_v_mmx; + pf[I_PRED_16x16_DC] = predict_16x16_dc; + pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_mmxext; + pf[I_PRED_16x16_P] = predict_16x16_p; + +#ifdef ARCH_X86_64 + pf[I_PRED_16x16_H] = predict_16x16_h; + pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left; +#endif } void x264_predict_8x8c_init_mmxext( x264_predict_t pf[7] ) { - pf[I_PRED_CHROMA_V] = predict_8x8c_v_mmx; - pf[I_PRED_CHROMA_P] = predict_8x8c_p; - pf[I_PRED_CHROMA_DC] = predict_8x8c_dc; + pf[I_PRED_CHROMA_V] = predict_8x8c_v_mmx; + pf[I_PRED_CHROMA_P] = predict_8x8c_p; + pf[I_PRED_CHROMA_DC] = predict_8x8c_dc; + +#ifdef ARCH_X86_64 + pf[I_PRED_CHROMA_H] = predict_8x8c_h; + pf[I_PRED_CHROMA_DC_LEFT] = predict_8x8c_dc_left; + pf[I_PRED_CHROMA_DC_TOP] = predict_8x8c_dc_top; +#endif } void x264_predict_8x8_init_mmxext( x264_predict8x8_t pf[12] ) { - pf[I_PRED_8x8_V] = predict_8x8_v_mmxext; + pf[I_PRED_8x8_V] = predict_8x8_v_mmxext; pf[I_PRED_8x8_DC] = predict_8x8_dc; }