#undef ZIG
#undef COPY4x4
+static void zigzag_interleave_8x8_cavlc( int16_t *dst, int16_t *src )
+{
+ int i,j;
+ for( i=0; i<4; i++ )
+ for( j=0; j<16; j++ )
+ dst[i*16+j] = src[i+j*4];
+}
+
void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
{
if( b_interlaced )
pf->scan_4x4 = x264_zigzag_scan_4x4_frame_altivec;
#endif
}
+
+ pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
+#ifdef HAVE_MMX
+ if( cpu&X264_CPU_MMX )
+ pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
+#endif
}
void (*scan_4x4)( int16_t level[16], int16_t dct[4][4] );
void (*sub_8x8)( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst );
void (*sub_4x4)( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst );
+ void (*interleave_8x8_cavlc)( int16_t *dst, int16_t *src );
} x264_zigzag_function_t;
movdqa [r0], xmm0
movdqa [r0+16], xmm1
RET
+
+INIT_MMX
+cglobal x264_zigzag_interleave_8x8_cavlc_mmx, 2,3
+ mov r2d, 24
+.loop:
+ movq m0, [r1+r2*4+ 0]
+ movq m1, [r1+r2*4+ 8]
+ movq m2, [r1+r2*4+16]
+ movq m3, [r1+r2*4+24]
+ TRANSPOSE4x4W 0,1,2,3,4
+ movq [r0+r2+ 0], m0
+ movq [r0+r2+32], m1
+ movq [r0+r2+64], m2
+ movq [r0+r2+96], m3
+ sub r2d, 8
+ jge .loop
+ REP_RET
void x264_zigzag_scan_4x4_frame_mmx ( int16_t level[16], int16_t dct[4][4] );
void x264_zigzag_scan_4x4_field_mmxext( int16_t level[16], int16_t dct[4][4] );
void x264_zigzag_sub_4x4_frame_ssse3 ( int16_t level[16], const uint8_t *src, uint8_t *dst );
+void x264_zigzag_interleave_8x8_cavlc_mmx( int16_t *dst, int16_t *src );
#endif
static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8start, int i8end )
{
- int i8, i4, i;
+ int i8, i4;
if( h->mb.b_transform_8x8 )
{
/* shuffle 8x8 dct coeffs into 4x4 lists */
for( i8 = i8start; i8 <= i8end; i8++ )
if( h->mb.i_cbp_luma & (1 << i8) )
- for( i4 = 0; i4 < 4; i4++ )
- for( i = 0; i < 16; i++ )
- h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4];
+ h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4], h->dct.luma8x8[i8] );
}
for( i8 = i8start; i8 <= i8end; i8++ )
ok = 1; used_asm = 0;
TEST_ZIGZAG_SCAN( scan_8x8, level1, level2, (void*)dct1, 64 );
TEST_ZIGZAG_SCAN( scan_4x4, level1, level2, dct1[0], 16 );
+ TEST_ZIGZAG_SCAN( interleave_8x8_cavlc, level1, level2, (void*)dct1, 64 );
TEST_ZIGZAG_SUB( sub_4x4, level1, level2, 16 );
report( "zigzag_frame :" );