movq [r0+24], m3
RET
+%ifdef X264_HIGH_BIT_DEPTH
+;-----------------------------------------------------------------------------
+; void sub4x4_dct( int32_t dct[4][4], uint16_t *pix1, uint16_t *pix2 )
+;-----------------------------------------------------------------------------
+cglobal sub4x4_dct_mmx, 3,3
+.skip_prologue:
+ LOAD_DIFF m0, m4, none, [r1+0*FENC_STRIDE], [r2+0*FDEC_STRIDE]
+ LOAD_DIFF m3, m4, none, [r1+6*FENC_STRIDE], [r2+6*FDEC_STRIDE]
+ LOAD_DIFF m1, m4, none, [r1+2*FENC_STRIDE], [r2+2*FDEC_STRIDE]
+ LOAD_DIFF m2, m4, none, [r1+4*FENC_STRIDE], [r2+4*FDEC_STRIDE]
+ DCT4_1D 0,1,2,3,4
+ TRANSPOSE4x4W 0,1,2,3,4
+ DCT4_1D 0,1,2,3,4
+ STORE_DIFF m0, m4, m5, [r0+ 0], [r0+ 8]
+ STORE_DIFF m1, m4, m5, [r0+16], [r0+24]
+ STORE_DIFF m2, m4, m5, [r0+32], [r0+40]
+ STORE_DIFF m3, m4, m5, [r0+48], [r0+56]
+ RET
+%endif ; X264_HIGH_BIT_DEPTH
+
+%ifndef X264_HIGH_BIT_DEPTH
%macro SUB_DCT4 1
;-----------------------------------------------------------------------------
; void sub4x4_dct( int16_t dct[4][4], uint8_t *pix1, uint8_t *pix2 )
SUB_DCT4 mmx
SUB_DCT4 ssse3
+%endif ; !X264_HIGH_BIT_DEPTH
+%ifndef X264_HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void add4x4_idct( uint8_t *p_dst, int16_t dct[4][4] )
;-----------------------------------------------------------------------------
movd [r0+FDEC_STRIDE*2], m0
pextrd [r0+FDEC_STRIDE*3], m0, 1
RET
+%endif ; !X264_HIGH_BIT_DEPTH
INIT_MMX
;-----------------------------------------------------------------------------
; void sub8x8_dct( int16_t dct[4][4][4], uint8_t *pix1, uint8_t *pix2 )
;-----------------------------------------------------------------------------
%macro SUB_NxN_DCT 6
-cglobal %1, 3,3,11
+cglobal %1, 3,3,11*(mmsize/16)
+%ifndef X264_HIGH_BIT_DEPTH
%if mmsize == 8
pxor m7, m7
%else
add r2, 4*FDEC_STRIDE
mova m7, [hsub_mul]
%endif
+%endif ; !X264_HIGH_BIT_DEPTH
.skip_prologue:
%ifdef WIN64
sub rsp, 8
; void add8x8_idct( uint8_t *pix, int16_t dct[4][4][4] )
;-----------------------------------------------------------------------------
%macro ADD_NxN_IDCT 6-7
-cglobal %1, 2,2,11
+cglobal %1, 2,2,11*(mmsize/16)
pxor m7, m7
%if mmsize==16
add r0, 4*FDEC_STRIDE
%endif
%endmacro
+%ifdef X264_HIGH_BIT_DEPTH
+INIT_MMX
+SUB_NxN_DCT sub8x8_dct_mmx, sub4x4_dct_mmx.skip_prologue, 64, 8, 0, 0
+SUB_NxN_DCT sub16x16_dct_mmx, sub8x8_dct_mmx.skip_prologue, 64, 16, 8, 8
+%else ; !X264_HIGH_BIT_DEPTH
%ifndef ARCH_X86_64
SUB_NxN_DCT sub8x8_dct_mmx, sub4x4_dct_mmx.skip_prologue, 32, 4, 0, 0
ADD_NxN_IDCT add8x8_idct_mmx, add4x4_idct_mmx.skip_prologue, 32, 4, 0, 0
cextern sub8x8_dct8_ssse3.skip_prologue
SUB_NxN_DCT sub16x16_dct8_ssse3, sub8x8_dct8_ssse3.skip_prologue, 128, 8, 0, 0
+%endif ; X264_HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
#ifndef X264_I386_DCT_H
#define X264_I386_DCT_H
-void x264_sub4x4_dct_mmx ( int16_t dct [16], uint8_t *pix1, uint8_t *pix2 );
-void x264_sub8x8_dct_mmx ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
-void x264_sub16x16_dct_mmx ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
+void x264_sub4x4_dct_mmx ( dctcoef dct [16], pixel *pix1, pixel *pix2 );
+void x264_sub8x8_dct_mmx ( dctcoef dct[ 4][16], pixel *pix1, pixel *pix2 );
+void x264_sub16x16_dct_mmx ( dctcoef dct[16][16], pixel *pix1, pixel *pix2 );
void x264_sub8x8_dct_sse2 ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
void x264_sub16x16_dct_sse2 ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
void x264_sub4x4_dct_ssse3 ( int16_t dct [16], uint8_t *pix1, uint8_t *pix2 );