From: HÃ¥kan Hjort Date: Mon, 17 Mar 2008 07:20:02 +0000 (-0600) Subject: increase alignment of mv arrays X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e56ea0861b650e1ee5f3951d786bfc5297183574;p=libx264 increase alignment of mv arrays --- diff --git a/common/common.h b/common/common.h index d1c5e776..c4925f21 100644 --- a/common/common.h +++ b/common/common.h @@ -460,16 +460,16 @@ struct x264_t int non_zero_count[X264_SCAN8_SIZE]; /* -1 if unused, -2 if unavailable */ - int8_t ref[2][X264_SCAN8_SIZE]; + DECLARE_ALIGNED( int8_t, ref[2][X264_SCAN8_SIZE], 4 ); /* 0 if not available */ - int16_t mv[2][X264_SCAN8_SIZE][2]; - int16_t mvd[2][X264_SCAN8_SIZE][2]; + DECLARE_ALIGNED( int16_t, mv[2][X264_SCAN8_SIZE][2], 16 ); + DECLARE_ALIGNED( int16_t, mvd[2][X264_SCAN8_SIZE][2], 4 ); /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */ - int8_t skip[X264_SCAN8_SIZE]; + DECLARE_ALIGNED( int8_t, skip[X264_SCAN8_SIZE], 4 ); - int16_t direct_mv[2][X264_SCAN8_SIZE][2]; + DECLARE_ALIGNED( int16_t, direct_mv[2][X264_SCAN8_SIZE][2], 16 ) ; int8_t direct_ref[2][X264_SCAN8_SIZE]; int pskip_mv[2]; diff --git a/encoder/analyse.c b/encoder/analyse.c index 2b3ec7e2..cdfd08e9 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -44,8 +44,8 @@ typedef struct /* 8x8 */ int i_cost8x8; - int mvc[32][5][2]; /* [ref][0] is 16x16 mv, - [ref][1..4] are 8x8 mv from partition [0..3] */ + /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */ + DECLARE_ALIGNED( int, mvc[32][5][2], 8 ); x264_me_t me8x8[4]; /* Sub 4x4 */ @@ -1145,7 +1145,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a ) { x264_me_t m; uint8_t **p_fenc = h->mb.pic.p_fenc; - int mvc[3][2]; + DECLARE_ALIGNED( int, mvc[3][2], 8 ); int i, j; /* XXX Needed for x264_mb_predict_mv */ @@ -1195,7 +1195,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a ) { x264_me_t m; uint8_t **p_fenc = h->mb.pic.p_fenc; - int mvc[3][2]; + DECLARE_ALIGNED( int, mvc[3][2], 8 ); int i, j; /* XXX Needed for x264_mb_predict_mv */ @@ -1698,7 +1698,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a ) { h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.p_fref[1][a->l1.i_ref] }; DECLARE_ALIGNED( uint8_t, pix[2][16*8], 16 ); - int mvc[2][2]; + DECLARE_ALIGNED( int, mvc[2][2], 8 ); int i, l; h->mb.i_partition = D_16x8; @@ -1721,10 +1721,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a ) LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i ); LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 0, 8*i ); - mvc[0][0] = lX->me8x8[2*i].mv[0]; - mvc[0][1] = lX->me8x8[2*i].mv[1]; - mvc[1][0] = lX->me8x8[2*i+1].mv[0]; - mvc[1][1] = lX->me8x8[2*i+1].mv[1]; + *(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[2*i].mv; + *(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[2*i+1].mv; x264_mb_predict_mv( h, l, 8*i, 2, m->mvp ); x264_me_search( h, m, mvc, 2 ); @@ -1769,7 +1767,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a ) { h->mb.pic.p_fref[0][a->l0.i_ref], h->mb.pic.p_fref[1][a->l1.i_ref] }; uint8_t pix[2][8*16]; - int mvc[2][2]; + DECLARE_ALIGNED( int, mvc[2][2], 8 ); int i, l; h->mb.i_partition = D_8x16; @@ -1791,10 +1789,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a ) LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 ); LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*i, 0 ); - mvc[0][0] = lX->me8x8[i].mv[0]; - mvc[0][1] = lX->me8x8[i].mv[1]; - mvc[1][0] = lX->me8x8[i+2].mv[0]; - mvc[1][1] = lX->me8x8[i+2].mv[1]; + *(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[i].mv; + *(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[i+2].mv; x264_mb_predict_mv( h, l, 4*i, 2, m->mvp ); x264_me_search( h, m, mvc, 2 ); diff --git a/encoder/me.h b/encoder/me.h index e0be0e9a..a1cc0afb 100644 --- a/encoder/me.h +++ b/encoder/me.h @@ -44,7 +44,7 @@ typedef struct /* output */ int cost_mv; /* lambda * nbits for the chosen mv */ int cost; /* satd + lambda * nbits */ - int mv[2]; + DECLARE_ALIGNED( int, mv[2], 8 ); } x264_me_t; void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh );