static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
{
- DECLARE_ALIGNED( uint8_t, pix1[8*8], 8 );
- DECLARE_ALIGNED( uint8_t, pix2[8*8], 8 );
+ DECLARE_ALIGNED( uint8_t, pix1[16*8], 8 );
+ uint8_t *pix2 = pix1+8;
const int i_stride = h->mb.pic.i_stride[1];
const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
const int oe = 4*(i8x8&1) + 2*(i8x8&2)*FENC_STRIDE;
#define CHROMA4x4MC( width, height, me, x, y ) \
- h->mc.mc_chroma( &p_fref[4][or+x+y*i_stride], i_stride, &pix1[x+y*8], 8, (me).mv[0], (me).mv[1], width, height ); \
- h->mc.mc_chroma( &p_fref[5][or+x+y*i_stride], i_stride, &pix2[x+y*8], 8, (me).mv[0], (me).mv[1], width, height );
+ h->mc.mc_chroma( &p_fref[4][or+x+y*i_stride], i_stride, &pix1[x+y*16], 16, (me).mv[0], (me).mv[1], width, height ); \
+ h->mc.mc_chroma( &p_fref[5][or+x+y*i_stride], i_stride, &pix2[x+y*16], 16, (me).mv[0], (me).mv[1], width, height );
if( pixel == PIXEL_4x4 )
{
CHROMA4x4MC( 2,4, a->l0.me4x8[i8x8][1], 2,0 );
}
- return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 8 )
- + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 8 );
+ return h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][oe], FENC_STRIDE, pix1, 16 )
+ + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][oe], FENC_STRIDE, pix2, 16 );
}
static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8x8 )
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * ( i_mb_x + i_mb_y * i_stride );
- uint8_t pix1[9*9], pix2[8*8];
+ DECLARE_ALIGNED( uint8_t, pix1[9*FDEC_STRIDE], 8 );
+ uint8_t *pix2 = pix1+8;
x264_me_t m[2];
int i_bcost = COST_MAX;
int i_cost_bak;
}
#define TRY_BIDIR( mv0, mv1, penalty ) \
{ \
- int stride2 = 8; \
+ int stride2 = 16; \
uint8_t *src2; \
int i_cost; \
- h->mc.mc_luma( m[0].p_fref, m[0].i_stride[0], pix1, 8, \
+ h->mc.mc_luma( m[0].p_fref, m[0].i_stride[0], pix1, 16, \
(mv0)[0], (mv0)[1], 8, 8 ); \
src2 = h->mc.get_ref( m[1].p_fref, m[1].i_stride[0], pix2, &stride2, \
(mv1)[0], (mv1)[1], 8, 8 ); \
- h->mc.avg[PIXEL_8x8]( pix1, 8, src2, stride2 ); \
+ h->mc.avg[PIXEL_8x8]( pix1, 16, src2, stride2 ); \
i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
- m[0].p_fenc[0], FENC_STRIDE, pix1, 8 ); \
+ m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
if( i_bcost > i_cost ) \
{ \
i_bcost = i_cost; \
lowres_intra_mb:
{
- DECLARE_ALIGNED( uint8_t, pix_buf[9*FDEC_STRIDE], 8 );
- uint8_t *pix = &pix_buf[8+FDEC_STRIDE - 1];
+ uint8_t *pix = &pix1[8+FDEC_STRIDE - 1];
uint8_t *src = &fenc->lowres[0][i_pel_offset - 1];
int intra_penalty = 5 + 10 * b_bidir;
int satds[4], i_icost;
if( pixel_asm.name[i] != pixel_ref.name[i] ) \
{ \
used_asm = 1; \
- res_c = pixel_c.name[i]( buf1, 32, buf2, 24 ); \
- res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \
+ res_c = pixel_c.name[i]( buf1, 32, buf2, 16 ); \
+ res_asm = pixel_asm.name[i]( buf1, 32, buf2, 16 ); \
if( res_c != res_asm ) \
{ \
ok = 0; \
if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \
{ \
used_asm = 1; \
- res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 24 ); \
- res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 24 ); \
- res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 24 ); \
+ res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 32 ); \
+ res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 32 ); \
+ res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 32 ); \
if(N==4) \
{ \
- res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 24 ); \
- pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 24, res_asm ); \
+ res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 32 ); \
+ pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 32, res_asm ); \
} \
else \
- pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 24, res_asm ); \
+ pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 32, res_asm ); \
if( memcmp(res_c, res_asm, sizeof(res_c)) ) \
{ \
ok = 0; \
if( mc_a.name[i] != mc_ref.name[i] ) \
{ \
used_asm = 1; \
- mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \
- mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \
+ mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \
+ mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \
if( memcmp( buf3, buf4, 1024 ) ) \
{ \
ok = 0; \