}
}
-void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] )
+void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int16_t mvp[2] )
{
const int i8 = x264_scan8[idx];
const int i_ref= h->mb.cache.ref[i_list][i8];
{
if( idx == 0 && i_refb == i_ref )
{
- mvp[0] = mv_b[0];
- mvp[1] = mv_b[1];
+ *(uint32_t*)mvp = *(uint32_t*)mv_b;
return;
}
else if( idx != 0 && i_refa == i_ref )
{
- mvp[0] = mv_a[0];
- mvp[1] = mv_a[1];
+ *(uint32_t*)mvp = *(uint32_t*)mv_a;
return;
}
}
{
if( idx == 0 && i_refa == i_ref )
{
- mvp[0] = mv_a[0];
- mvp[1] = mv_a[1];
+ *(uint32_t*)mvp = *(uint32_t*)mv_a;
return;
}
else if( idx != 0 && i_refc == i_ref )
{
- mvp[0] = mv_c[0];
- mvp[1] = mv_c[1];
+ *(uint32_t*)mvp = *(uint32_t*)mv_c;
return;
}
}
else if( i_count == 1 )
{
if( i_refa == i_ref )
- {
- mvp[0] = mv_a[0];
- mvp[1] = mv_a[1];
- }
+ *(uint32_t*)mvp = *(uint32_t*)mv_a;
else if( i_refb == i_ref )
- {
- mvp[0] = mv_b[0];
- mvp[1] = mv_b[1];
- }
+ *(uint32_t*)mvp = *(uint32_t*)mv_b;
else
- {
- mvp[0] = mv_c[0];
- mvp[1] = mv_c[1];
- }
+ *(uint32_t*)mvp = *(uint32_t*)mv_c;
}
else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
- {
- mvp[0] = mv_a[0];
- mvp[1] = mv_a[1];
- }
+ *(uint32_t*)mvp = *(uint32_t*)mv_a;
else
{
mvp[0] = x264_median( mv_a[0], mv_b[0], mv_c[0] );
}
}
-void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] )
+void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
{
int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
int16_t *mv_a = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
else if( i_count == 1 )
{
if( i_refa == i_ref )
- {
- mvp[0] = mv_a[0];
- mvp[1] = mv_a[1];
- }
+ *(uint32_t*)mvp = *(uint32_t*)mv_a;
else if( i_refb == i_ref )
- {
- mvp[0] = mv_b[0];
- mvp[1] = mv_b[1];
- }
+ *(uint32_t*)mvp = *(uint32_t*)mv_b;
else
- {
- mvp[0] = mv_c[0];
- mvp[1] = mv_c[1];
- }
+ *(uint32_t*)mvp = *(uint32_t*)mv_c;
}
else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
- {
- mvp[0] = mv_a[0];
- mvp[1] = mv_a[1];
- }
+ *(uint32_t*)mvp = *(uint32_t*)mv_a;
else
{
mvp[0] = x264_median( mv_a[0], mv_b[0], mv_c[0] );
}
-void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] )
+void x264_mb_predict_mv_pskip( x264_t *h, int16_t mv[2] )
{
int i_refa = h->mb.cache.ref[0][X264_SCAN8_0 - 1];
int i_refb = h->mb.cache.ref[0][X264_SCAN8_0 - 8];
int16_t *mv_b = h->mb.cache.mv[0][X264_SCAN8_0 - 8];
if( i_refa == -2 || i_refb == -2 ||
- ( i_refa == 0 && mv_a[0] == 0 && mv_a[1] == 0 ) ||
- ( i_refb == 0 && mv_b[0] == 0 && mv_b[1] == 0 ) )
+ ( i_refa == 0 && *(uint32_t*)mv_a == 0 ) ||
+ ( i_refb == 0 && *(uint32_t*)mv_b == 0 ) )
{
- mv[0] = mv[1] = 0;
+ *(uint32_t*)mv = 0;
}
else
{
if( IS_INTRA( type_col ) )
{
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
- x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, 0, 0 );
- x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0, 0 );
+ x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, 0 );
+ x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 );
return 1;
}
b8x8 = h->sps->b_direct8x8_inference ||
if( b8x8 )
{
const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
- int mv_l0[2];
- mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
- mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, mv_l0[0], mv_l0[1] );
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] );
+ const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
+ const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
}
else
{
const int x4 = i4%2 + 2*x8;
const int y4 = i4/2 + 2*y8;
const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + x4 + y4 * h->mb.i_b4_stride ];
- int mv_l0[2];
- mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
- mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
- x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_l0[0], mv_l0[1] );
- x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] );
+ const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
+ const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, pack16to32_mask(l0x, l0y) );
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
}
}
}
static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
{
int ref[2];
- int mv[2][2];
+ DECLARE_ALIGNED_4( int16_t mv[2][2] );
int i_list;
int i8, i4;
int b8x8;
{
ref[0] =
ref[1] = 0;
- mv[0][0] =
- mv[0][1] =
- mv[1][0] =
- mv[1][1] = 0;
+ *(uint64_t*)mv[0] = 0;
}
else
{
if( ref[i_list] >= 0 )
x264_mb_predict_mv_16x16( h, i_list, ref[i_list], mv[i_list] );
else
- mv[i_list][0] = mv[i_list][1] = 0;
+ *(uint32_t*)mv[i_list] = 0;
}
}
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
- x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, mv[0][0], mv[0][1] );
- x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, mv[1][0], mv[1][1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, mv[0] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, mv[1] );
if( IS_INTRA( type_col ) )
return 1;
if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
{
if( ref[0] == 0 )
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
if( ref[1] == 0 )
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 );
+ x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
}
}
else
if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
{
if( ref[0] == 0 )
- x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0, 0 );
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0 );
if( ref[1] == 0 )
- x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
+ x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0 );
}
}
}
#define FIXED_SCALE 256
/* This just improves encoder performance, it's not part of the spec */
-void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[8][2], int *i_mvc )
+void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[8][2], int *i_mvc )
{
int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
int i = 0;
#define SET_MVP(mvp) { \
- mvc[i][0] = mvp[0]; \
- mvc[i][1] = mvp[1]; \
+ *(uint32_t*)mvc[i] = *(uint32_t*)mvp; \
i++; \
}
const int ir = i_top_8x8 - 1;
const int iv = i_top_4x4 - 1;
h->mb.cache.ref[i_list][i8] = h->mb.ref[i_list][ir];
- h->mb.cache.mv[i_list][i8][0] = h->mb.mv[i_list][iv][0];
- h->mb.cache.mv[i_list][i8][1] = h->mb.mv[i_list][iv][1];
+ *(uint32_t*)h->mb.cache.mv[i_list][i8] = *(uint32_t*)h->mb.mv[i_list][iv];
}
else
{
const int i8 = x264_scan8[0] - 1 - 1*8;
h->mb.cache.ref[i_list][i8] = -2;
- h->mb.cache.mv[i_list][i8][0] = 0;
- h->mb.cache.mv[i_list][i8][1] = 0;
+ *(uint32_t*)h->mb.cache.mv[i_list][i8] = 0;
}
if( h->mb.i_neighbour & MB_TOP )
h->mb.cache.ref[i_list][i8+1] = h->mb.ref[i_list][ir + 0];
h->mb.cache.ref[i_list][i8+2] =
h->mb.cache.ref[i_list][i8+3] = h->mb.ref[i_list][ir + 1];
-
- for( i = 0; i < 4; i++ )
- {
- h->mb.cache.mv[i_list][i8+i][0] = h->mb.mv[i_list][iv + i][0];
- h->mb.cache.mv[i_list][i8+i][1] = h->mb.mv[i_list][iv + i][1];
- }
+ *(uint64_t*)h->mb.cache.mv[i_list][i8+0] = *(uint64_t*)h->mb.mv[i_list][iv+0];
+ *(uint64_t*)h->mb.cache.mv[i_list][i8+2] = *(uint64_t*)h->mb.mv[i_list][iv+2];
}
else
{
const int i8 = x264_scan8[0] - 8;
- for( i = 0; i < 4; i++ )
- {
- h->mb.cache.ref[i_list][i8+i] = -2;
- h->mb.cache.mv[i_list][i8+i][0] =
- h->mb.cache.mv[i_list][i8+i][1] = 0;
- }
+ *(uint64_t*)h->mb.cache.mv[i_list][i8+0] = 0;
+ *(uint64_t*)h->mb.cache.mv[i_list][i8+2] = 0;
+ *(uint32_t*)&h->mb.cache.ref[i_list][i8] = (uint8_t)(-2) * 0x01010101U;
}
if( h->mb.i_neighbour & MB_TOPRIGHT )
const int ir = i_top_8x8 + 2;
const int iv = i_top_4x4 + 4;
h->mb.cache.ref[i_list][i8] = h->mb.ref[i_list][ir];
- h->mb.cache.mv[i_list][i8][0] = h->mb.mv[i_list][iv][0];
- h->mb.cache.mv[i_list][i8][1] = h->mb.mv[i_list][iv][1];
+ *(uint32_t*)h->mb.cache.mv[i_list][i8] = *(uint32_t*)h->mb.mv[i_list][iv];
}
else
{
const int i8 = x264_scan8[0] + 4 - 1*8;
h->mb.cache.ref[i_list][i8] = -2;
- h->mb.cache.mv[i_list][i8][0] = 0;
- h->mb.cache.mv[i_list][i8][1] = 0;
+ *(uint32_t*)h->mb.cache.mv[i_list][i8] = 0;
}
if( h->mb.i_neighbour & MB_LEFT )
h->mb.cache.ref[i_list][i8+3*8] = h->mb.ref[i_list][ir + 1*s8x8];
for( i = 0; i < 4; i++ )
- {
- h->mb.cache.mv[i_list][i8+i*8][0] = h->mb.mv[i_list][iv + i*s4x4][0];
- h->mb.cache.mv[i_list][i8+i*8][1] = h->mb.mv[i_list][iv + i*s4x4][1];
- }
+ *(uint32_t*)h->mb.cache.mv[i_list][i8+i*8] = *(uint32_t*)h->mb.mv[i_list][iv + i*s4x4];
}
else
{
for( i = 0; i < 4; i++ )
{
h->mb.cache.ref[i_list][i8+i*8] = -2;
- h->mb.cache.mv[i_list][i8+i*8][0] =
- h->mb.cache.mv[i_list][i8+i*8][1] = 0;
+ *(uint32_t*)h->mb.cache.mv[i_list][i8+i*8] = 0;
}
}
{
const int i8 = x264_scan8[0] - 8;
const int iv = i_top_4x4;
- for( i = 0; i < 4; i++ )
- {
- h->mb.cache.mvd[i_list][i8+i][0] = h->mb.mvd[i_list][iv + i][0];
- h->mb.cache.mvd[i_list][i8+i][1] = h->mb.mvd[i_list][iv + i][1];
- }
+ *(uint64_t*)h->mb.cache.mvd[i_list][i8+0] = *(uint64_t*)h->mb.mvd[i_list][iv+0];
+ *(uint64_t*)h->mb.cache.mvd[i_list][i8+2] = *(uint64_t*)h->mb.mvd[i_list][iv+2];
}
else
{
const int i8 = x264_scan8[0] - 8;
- for( i = 0; i < 4; i++ )
- {
- h->mb.cache.mvd[i_list][i8+i][0] =
- h->mb.cache.mvd[i_list][i8+i][1] = 0;
- }
+ *(uint64_t*)h->mb.cache.mvd[i_list][i8+0] =
+ *(uint64_t*)h->mb.cache.mvd[i_list][i8+2] = 0;
}
if( i_left_type >= 0 )
const int i8 = x264_scan8[0] - 1;
const int iv = i_mb_4x4 - 1;
for( i = 0; i < 4; i++ )
- {
- h->mb.cache.mvd[i_list][i8+i*8][0] = h->mb.mvd[i_list][iv + i*s4x4][0];
- h->mb.cache.mvd[i_list][i8+i*8][1] = h->mb.mvd[i_list][iv + i*s4x4][1];
- }
+ *(uint32_t*)h->mb.cache.mvd[i_list][i8+i*8] = *(uint32_t*)h->mb.mvd[i_list][iv + i*s4x4];
}
else
{
const int i8 = x264_scan8[0] - 1;
for( i = 0; i < 4; i++ )
- {
- h->mb.cache.mvd[i_list][i8+i*8][0] =
- h->mb.cache.mvd[i_list][i8+i*8][1] = 0;
- }
+ *(uint32_t*)h->mb.cache.mvd[i_list][i8+i*8] = 0;
}
}
}
int i_list;
for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ )
{
- int y,x;
+ int y;
h->mb.ref[i_list][i_mb_8x8+0+0*s8x8] = h->mb.cache.ref[i_list][x264_scan8[0]];
h->mb.ref[i_list][i_mb_8x8+1+0*s8x8] = h->mb.cache.ref[i_list][x264_scan8[4]];
for( y = 0; y < 4; y++ )
{
- for( x = 0; x < 4; x++ )
- {
- h->mb.mv[i_list][i_mb_4x4+x+y*s4x4][0] = h->mb.cache.mv[i_list][x264_scan8[0]+x+8*y][0];
- h->mb.mv[i_list][i_mb_4x4+x+y*s4x4][1] = h->mb.cache.mv[i_list][x264_scan8[0]+x+8*y][1];
- }
+ *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+0] = *(uint64_t*)h->mb.cache.mv[i_list][x264_scan8[0]+8*y+0];
+ *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+2] = *(uint64_t*)h->mb.cache.mv[i_list][x264_scan8[0]+8*y+2];
}
}
}
int i_list;
for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ )
{
- int y,x;
+ int y;
- h->mb.ref[i_list][i_mb_8x8+0+0*s8x8] =
- h->mb.ref[i_list][i_mb_8x8+1+0*s8x8] =
- h->mb.ref[i_list][i_mb_8x8+0+1*s8x8] =
- h->mb.ref[i_list][i_mb_8x8+1+1*s8x8] = -1;
+ *(uint16_t*)&h->mb.ref[i_list][i_mb_8x8+0*s8x8] = (uint8_t)(-1) * 0x0101;
+ *(uint16_t*)&h->mb.ref[i_list][i_mb_8x8+1*s8x8] = (uint8_t)(-1) * 0x0101;
for( y = 0; y < 4; y++ )
{
- for( x = 0; x < 4; x++ )
- {
- h->mb.mv[i_list][i_mb_4x4+x+y*s4x4][0] = 0;
- h->mb.mv[i_list][i_mb_4x4+x+y*s4x4][1] = 0;
- }
+ *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+0] = 0;
+ *(uint64_t*)h->mb.mv[i_list][i_mb_4x4+y*s4x4+2] = 0;
}
}
}
for( i_list = 0; i_list < 2; i_list++ )
{
const int s4x4 = 4 * h->mb.i_mb_stride;
- int y,x;
+ int y;
for( y = 0; y < 4; y++ )
{
- for( x = 0; x < 4; x++ )
- {
- h->mb.mvd[i_list][i_mb_4x4+x+y*s4x4][0] = h->mb.cache.mvd[i_list][x264_scan8[0]+x+8*y][0];
- h->mb.mvd[i_list][i_mb_4x4+x+y*s4x4][1] = h->mb.cache.mvd[i_list][x264_scan8[0]+x+8*y][1];
- }
+ *(uint64_t*)h->mb.mvd[i_list][i_mb_4x4+y*s4x4+0] = *(uint64_t*)h->mb.cache.mvd[i_list][x264_scan8[0]+8*y+0];
+ *(uint64_t*)h->mb.mvd[i_list][i_mb_4x4+y*s4x4+2] = *(uint64_t*)h->mb.cache.mvd[i_list][x264_scan8[0]+8*y+2];
}
}
}
for( i_list = 0; i_list < 2; i_list++ )
{
const int s4x4 = 4 * h->mb.i_mb_stride;
- int y,x;
+ int y;
for( y = 0; y < 4; y++ )
{
- for( x = 0; x < 4; x++ )
- {
- h->mb.mvd[i_list][i_mb_4x4+x+y*s4x4][0] = 0;
- h->mb.mvd[i_list][i_mb_4x4+x+y*s4x4][1] = 0;
- }
+ *(uint64_t*)h->mb.mvd[i_list][i_mb_4x4+y*s4x4+0] = 0;
+ *(uint64_t*)h->mb.mvd[i_list][i_mb_4x4+y*s4x4+2] = 0;
}
}
}
/* 8x8 */
int i_cost8x8;
/* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
- DECLARE_ALIGNED_8( int mvc[32][5][2] );
+ DECLARE_ALIGNED_4( int16_t mvc[32][5][2] );
x264_me_t me8x8[4];
/* Sub 4x4 */
static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
{
x264_me_t m;
- int i_ref;
- int mvc[7][2], i_mvc;
+ int i_ref, i_mvc;
+ DECLARE_ALIGNED_4( int16_t mvc[7][2] );
int i_halfpel_thresh = INT_MAX;
int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
a->l0.me16x16 = m;
/* save mv for predicting neighbors */
- a->l0.mvc[i_ref][0][0] =
- h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
- a->l0.mvc[i_ref][0][1] =
- h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
+ *(uint32_t*)a->l0.mvc[i_ref][0] =
+ *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
}
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
h->mb.i_type = P_L0;
if( a->b_mbrd && a->l0.me16x16.i_ref == 0
- && a->l0.me16x16.mv[0] == h->mb.cache.pskip_mv[0]
- && a->l0.me16x16.mv[1] == h->mb.cache.pskip_mv[1] )
+ && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )
{
h->mb.i_partition = D_16x16;
- x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
}
}
}
for( i_ref = 0; i_ref <= i_maxref; i_ref++ )
- {
- a->l0.mvc[i_ref][0][0] = h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0];
- a->l0.mvc[i_ref][0][1] = h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1];
- }
+ *(uint32_t*)a->l0.mvc[i_ref][0] = *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy];
for( i = 0; i < 4; i++ )
{
m.cost += i_ref_cost;
i_halfpel_thresh += i_ref_cost;
- *(uint64_t*)a->l0.mvc[i_ref][i+1] = *(uint64_t*)m.mv;
+ *(uint32_t*)a->l0.mvc[i_ref][i+1] = *(uint32_t*)m.mv;
if( m.cost < l0m->cost )
*l0m = m;
}
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, l0m->mv[0], l0m->mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, 0, l0m->mv );
x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, l0m->i_ref );
/* mb type cost */
uint8_t **p_fref = h->mb.pic.p_fref[0][i_ref];
uint8_t **p_fenc = h->mb.pic.p_fenc;
int i_mvc;
- int (*mvc)[2] = a->l0.mvc[i_ref];
+ int16_t (*mvc)[2] = a->l0.mvc[i_ref];
int i;
/* XXX Needed for x264_mb_predict_mv */
h->mb.i_partition = D_8x8;
i_mvc = 1;
- *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.me16x16.mv;
+ *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.me16x16.mv;
for( i = 0; i < 4; i++ )
{
x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
x264_me_search( h, m, mvc, i_mvc );
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, 0, m->mv );
- *(uint64_t*)mvc[i_mvc] = *(uint64_t*)m->mv;
+ *(uint32_t*)mvc[i_mvc] = *(uint32_t*)m->mv;
i_mvc++;
/* mb type cost */
{
x264_me_t m;
uint8_t **p_fenc = h->mb.pic.p_fenc;
- DECLARE_ALIGNED_8( int mvc[3][2] );
+ DECLARE_ALIGNED_4( int16_t mvc[3][2] );
int i, j;
/* XXX Needed for x264_mb_predict_mv */
m.i_ref = i_ref;
/* if we skipped the 16x16 predictor, we wouldn't have to copy anything... */
- *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.mvc[i_ref][0];
- *(uint64_t*)mvc[1] = *(uint64_t*)a->l0.mvc[i_ref][2*i+1];
- *(uint64_t*)mvc[2] = *(uint64_t*)a->l0.mvc[i_ref][2*i+2];
+ *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];
+ *(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][2*i+1];
+ *(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][2*i+2];
LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 0, 8*i );
x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, i_ref );
if( m.cost < l0m->cost )
*l0m = m;
}
- x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, l0m->mv[0], l0m->mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 2*i, 4, 2, 0, l0m->mv );
x264_macroblock_cache_ref( h, 0, 2*i, 4, 2, 0, l0m->i_ref );
}
{
x264_me_t m;
uint8_t **p_fenc = h->mb.pic.p_fenc;
- DECLARE_ALIGNED_8( int mvc[3][2] );
+ DECLARE_ALIGNED_4( int16_t mvc[3][2] );
int i, j;
/* XXX Needed for x264_mb_predict_mv */
m.i_ref_cost = i_ref_cost;
m.i_ref = i_ref;
- *(uint64_t*)mvc[0] = *(uint64_t*)a->l0.mvc[i_ref][0];
- *(uint64_t*)mvc[1] = *(uint64_t*)a->l0.mvc[i_ref][i+1];
- *(uint64_t*)mvc[2] = *(uint64_t*)a->l0.mvc[i_ref][i+3];
+ *(uint32_t*)mvc[0] = *(uint32_t*)a->l0.mvc[i_ref][0];
+ *(uint32_t*)mvc[1] = *(uint32_t*)a->l0.mvc[i_ref][i+1];
+ *(uint32_t*)mvc[2] = *(uint32_t*)a->l0.mvc[i_ref][i+3];
LOAD_HPELS( &m, h->mb.pic.p_fref[0][i_ref], 0, i_ref, 8*i, 0 );
x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, i_ref );
if( m.cost < l0m->cost )
*l0m = m;
}
- x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, l0m->mv[0], l0m->mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 2*i, 0, 2, 4, 0, l0m->mv );
x264_macroblock_cache_ref( h, 2*i, 0, 2, 4, 0, l0m->i_ref );
}
x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
- x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
+ x264_macroblock_cache_mv_ptr( h, x4, y4, 1, 1, 0, m->mv );
}
a->l0.i_cost4x4[i8x8] = a->l0.me4x4[i8x8][0].cost +
a->l0.me4x4[i8x8][1].cost +
x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
- x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
+ x264_macroblock_cache_mv_ptr( h, x4, y4, 2, 1, 0, m->mv );
}
a->l0.i_cost8x4[i8x8] = a->l0.me8x4[i8x8][0].cost + a->l0.me8x4[i8x8][1].cost +
REF_COST( 0, i_ref ) +
x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
- x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
+ x264_macroblock_cache_mv_ptr( h, x4, y4, 1, 2, 0, m->mv );
}
a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost +
REF_COST( 0, i_ref ) +
int weight;
x264_me_t m;
- int i_ref;
- int mvc[8][2], i_mvc;
+ int i_ref, i_mvc;
+ DECLARE_ALIGNED_4( int16_t mvc[8][2] );
int i_halfpel_thresh = INT_MAX;
int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL;
}
/* save mv for predicting neighbors */
- h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
- h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
+ *(uint32_t*)h->mb.mvr[0][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
}
/* subtract ref cost, so we don't have to add it for the other MB types */
a->l0.me16x16.cost -= REF_COST( 0, a->l0.i_ref );
}
/* save mv for predicting neighbors */
- h->mb.mvr[1][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
- h->mb.mvr[1][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
+ *(uint32_t*)h->mb.mvr[1][i_ref][h->mb.i_mb_xy] = *(uint32_t*)m.mv;
}
/* subtract ref cost, so we don't have to add it for the other MB types */
a->l1.me16x16.cost -= REF_COST( 1, a->l1.i_ref );
/* get cost of BI mode */
weight = h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref];
- if ( ((a->l0.me16x16.mv[0] | a->l0.me16x16.mv[1]) & 1) == 0 )
+ if ( (*(uint32_t*)a->l0.me16x16.mv & 0x10001) == 0 )
{
/* l0 reference is halfpel, so get_ref on it will make it faster */
src2 =
switch( h->mb.i_sub_partition[i] )
{
case D_L0_8x8:
- x264_macroblock_cache_mv( h, x, y, 2, 2, 0, a->l0.me8x8[i].mv[0], a->l0.me8x8[i].mv[1] );
+ x264_macroblock_cache_mv_ptr( h, x, y, 2, 2, 0, a->l0.me8x8[i].mv );
break;
case D_L0_8x4:
- x264_macroblock_cache_mv( h, x, y+0, 2, 1, 0, a->l0.me8x4[i][0].mv[0], a->l0.me8x4[i][0].mv[1] );
- x264_macroblock_cache_mv( h, x, y+1, 2, 1, 0, a->l0.me8x4[i][1].mv[0], a->l0.me8x4[i][1].mv[1] );
+ x264_macroblock_cache_mv_ptr( h, x, y+0, 2, 1, 0, a->l0.me8x4[i][0].mv );
+ x264_macroblock_cache_mv_ptr( h, x, y+1, 2, 1, 0, a->l0.me8x4[i][1].mv );
break;
case D_L0_4x8:
- x264_macroblock_cache_mv( h, x+0, y, 1, 2, 0, a->l0.me4x8[i][0].mv[0], a->l0.me4x8[i][0].mv[1] );
- x264_macroblock_cache_mv( h, x+1, y, 1, 2, 0, a->l0.me4x8[i][1].mv[0], a->l0.me4x8[i][1].mv[1] );
+ x264_macroblock_cache_mv_ptr( h, x+0, y, 1, 2, 0, a->l0.me4x8[i][0].mv );
+ x264_macroblock_cache_mv_ptr( h, x+1, y, 1, 2, 0, a->l0.me4x8[i][1].mv );
break;
case D_L0_4x4:
- x264_macroblock_cache_mv( h, x+0, y+0, 1, 1, 0, a->l0.me4x4[i][0].mv[0], a->l0.me4x4[i][0].mv[1] );
- x264_macroblock_cache_mv( h, x+1, y+0, 1, 1, 0, a->l0.me4x4[i][1].mv[0], a->l0.me4x4[i][1].mv[1] );
- x264_macroblock_cache_mv( h, x+0, y+1, 1, 1, 0, a->l0.me4x4[i][2].mv[0], a->l0.me4x4[i][2].mv[1] );
- x264_macroblock_cache_mv( h, x+1, y+1, 1, 1, 0, a->l0.me4x4[i][3].mv[0], a->l0.me4x4[i][3].mv[1] );
+ x264_macroblock_cache_mv_ptr( h, x+0, y+0, 1, 1, 0, a->l0.me4x4[i][0].mv );
+ x264_macroblock_cache_mv_ptr( h, x+1, y+0, 1, 1, 0, a->l0.me4x4[i][1].mv );
+ x264_macroblock_cache_mv_ptr( h, x+0, y+1, 1, 1, 0, a->l0.me4x4[i][2].mv );
+ x264_macroblock_cache_mv_ptr( h, x+1, y+1, 1, 1, 0, a->l0.me4x4[i][3].mv );
break;
default:
x264_log( h, X264_LOG_ERROR, "internal error\n" );
if( x264_mb_partition_listX_table[0][part] ) \
{ \
x264_macroblock_cache_ref( h, x,y,dx,dy, 0, a->l0.i_ref ); \
- x264_macroblock_cache_mv( h, x,y,dx,dy, 0, me0.mv[0], me0.mv[1] ); \
+ x264_macroblock_cache_mv_ptr( h, x,y,dx,dy, 0, me0.mv ); \
} \
else \
{ \
x264_macroblock_cache_ref( h, x,y,dx,dy, 0, -1 ); \
- x264_macroblock_cache_mv( h, x,y,dx,dy, 0, 0, 0 ); \
+ x264_macroblock_cache_mv( h, x,y,dx,dy, 0, 0 ); \
if( b_mvd ) \
- x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0, 0 ); \
+ x264_macroblock_cache_mvd( h, x,y,dx,dy, 0, 0 ); \
} \
if( x264_mb_partition_listX_table[1][part] ) \
{ \
x264_macroblock_cache_ref( h, x,y,dx,dy, 1, a->l1.i_ref ); \
- x264_macroblock_cache_mv( h, x,y,dx,dy, 1, me1.mv[0], me1.mv[1] ); \
+ x264_macroblock_cache_mv_ptr( h, x,y,dx,dy, 1, me1.mv ); \
} \
else \
{ \
x264_macroblock_cache_ref( h, x,y,dx,dy, 1, -1 ); \
- x264_macroblock_cache_mv( h, x,y,dx,dy, 1, 0, 0 ); \
+ x264_macroblock_cache_mv( h, x,y,dx,dy, 1, 0 ); \
if( b_mvd ) \
- x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0, 0 ); \
+ x264_macroblock_cache_mvd( h, x,y,dx,dy, 1, 0 ); \
}
static inline void x264_mb_cache_mv_b8x8( x264_t *h, x264_mb_analysis_t *a, int i, int b_mvd )
x264_mb_load_mv_direct8x8( h, i );
if( b_mvd )
{
- x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0, 0 );
- x264_macroblock_cache_mvd( h, x, y, 2, 2, 1, 0, 0 );
+ x264_macroblock_cache_mvd( h, x, y, 2, 2, 0, 0 );
+ x264_macroblock_cache_mvd( h, x, y, 2, 2, 1, 0 );
x264_macroblock_cache_skip( h, x, y, 2, 2, 1 );
}
}
x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
x264_me_search( h, m, &lX->me16x16.mv, 1 );
- x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, l, m->mv[0], m->mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 2*x8, 2*y8, 2, 2, l, m->mv );
/* BI mode */
h->mc.mc_luma( pix[l], 8, m->p_fref, m->i_stride[0],
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
DECLARE_ALIGNED_16( uint8_t pix[2][16*8] );
- DECLARE_ALIGNED_8( int mvc[2][2] );
+ DECLARE_ALIGNED_4( int16_t mvc[2][2] );
int i, l;
h->mb.i_partition = D_16x8;
LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );
LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 0, 8*i );
- *(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[2*i].mv;
- *(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[2*i+1].mv;
+ *(uint32_t*)mvc[0] = *(uint32_t*)lX->me8x8[2*i].mv;
+ *(uint32_t*)mvc[1] = *(uint32_t*)lX->me8x8[2*i+1].mv;
x264_mb_predict_mv( h, l, 8*i, 2, m->mvp );
x264_me_search( h, m, mvc, 2 );
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
DECLARE_ALIGNED_8( uint8_t pix[2][8*16] );
- DECLARE_ALIGNED_8( int mvc[2][2] );
+ DECLARE_ALIGNED_4( int16_t mvc[2][2] );
int i, l;
h->mb.i_partition = D_8x16;
LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );
LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*i, 0 );
- *(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[i].mv;
- *(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[i+2].mv;
+ *(uint32_t*)mvc[0] = *(uint32_t*)lX->me8x8[i].mv;
+ *(uint32_t*)mvc[1] = *(uint32_t*)lX->me8x8[i+2].mv;
x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
x264_me_search( h, m, mvc, 2 );
{
case D_16x16:
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.me16x16.i_ref );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
break;
case D_16x8:
x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].i_ref );
x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].i_ref );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].mv[0], a->l0.me16x8[0].mv[1] );
- x264_macroblock_cache_mv ( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].mv[0], a->l0.me16x8[1].mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 2, 0, a->l0.me16x8[0].mv );
+ x264_macroblock_cache_mv_ptr( h, 0, 2, 4, 2, 0, a->l0.me16x8[1].mv );
break;
case D_8x16:
x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].i_ref );
x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].i_ref );
- x264_macroblock_cache_mv ( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].mv[0], a->l0.me8x16[0].mv[1] );
- x264_macroblock_cache_mv ( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].mv[0], a->l0.me8x16[1].mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 2, 4, 0, a->l0.me8x16[0].mv );
+ x264_macroblock_cache_mv_ptr( h, 2, 0, 2, 4, 0, a->l0.me8x16[1].mv );
break;
default:
{
h->mb.i_partition = D_16x16;
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, 0 );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, h->mb.cache.pskip_mv[0],
- h->mb.cache.pskip_mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, h->mb.cache.pskip_mv );
break;
}
{
case B_L0_L0:
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, -1 );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0, 0 );
- x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0, 0 );
+ x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, 0 );
+ x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 1, 0 );
break;
case B_L1_L1:
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, -1 );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0, 0 );
- x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0, 0 );
+ x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, 0 );
+ x264_macroblock_cache_mvd( h, 0, 0, 4, 4, 0, 0 );
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );
break;
case B_BI_BI:
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, a->l0.i_ref );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, a->l1.i_ref );
- x264_macroblock_cache_mv ( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] );
+ x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv );
break;
}
break;