From: Fiona Glaser Date: Thu, 25 Feb 2010 04:51:43 +0000 (-0800) Subject: Cut size of MVD arrays by a factor of 2 again X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=269b36dbd1e6355f750ef66894423a1189597ef9;p=libx264 Cut size of MVD arrays by a factor of 2 again Only store the MVDs of the edges of each MB. Thanks to Michael Niedermayer for the idea. --- diff --git a/common/macroblock.c b/common/macroblock.c index decc0319..fdfcb7b6 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -712,8 +712,8 @@ int x264_macroblock_cache_init( x264_t *h ) if( h->param.b_cabac ) { CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) ); - CHECKED_MALLOC( h->mb.mvd[0], 2*16 * i_mb_count * sizeof(uint8_t) ); - CHECKED_MALLOC( h->mb.mvd[1], 2*16 * i_mb_count * sizeof(uint8_t) ); + CHECKED_MALLOC( h->mb.mvd[0], 2*8 * i_mb_count * sizeof(uint8_t) ); + CHECKED_MALLOC( h->mb.mvd[1], 2*8 * i_mb_count * sizeof(uint8_t) ); } for( i=0; i<2; i++ ) @@ -1211,25 +1211,20 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) if( h->param.b_cabac ) { if( i_top_type >= 0 ) - CP64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8], h->mb.mvd[i_list][i_top_4x4] ); + CP64( h->mb.cache.mvd[i_list][x264_scan8[0]-8], h->mb.mvd[i_list][i_top_xy*8] ); else - M64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8] ) = 0; + M64( h->mb.cache.mvd[i_list][x264_scan8[0]-8] ) = 0; if( i_left_type >= 0 ) { - const int i8 = x264_scan8[0] - 1; - const int iv = i_mb_4x4 - 1; - CP16( h->mb.cache.mvd[i_list][i8+0*8], h->mb.mvd[i_list][iv + 0*s4x4] ); - CP16( h->mb.cache.mvd[i_list][i8+1*8], h->mb.mvd[i_list][iv + 1*s4x4] ); - CP16( h->mb.cache.mvd[i_list][i8+2*8], h->mb.mvd[i_list][iv + 2*s4x4] ); - CP16( h->mb.cache.mvd[i_list][i8+3*8], h->mb.mvd[i_list][iv + 3*s4x4] ); + CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+0*8], h->mb.mvd[i_list][i_left_xy*8+4] ); + CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+1*8], h->mb.mvd[i_list][i_left_xy*8+5] ); + CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+2*8], h->mb.mvd[i_list][i_left_xy*8+6] ); + CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+3*8], h->mb.mvd[i_list][i_left_xy*8+3] ); } else - { - const int i8 = x264_scan8[0] - 1; for( i = 0; i < 4; i++ ) - M16( h->mb.cache.mvd[i_list][i8+i*8] ) = 0; - } + M16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+i*8] ) = 0; } } @@ -1406,19 +1401,27 @@ void x264_macroblock_cache_save( x264_t *h ) if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) && !IS_DIRECT( i_mb_type ) ) { - for( y = 0; y < 4; y++ ) - CP64( h->mb.mvd[0][i_mb_4x4+y*s4x4], h->mb.cache.mvd[0][x264_scan8[0]+8*y] ); + CP64( h->mb.mvd[0][i_mb_xy*8+0], h->mb.cache.mvd[0][x264_scan8[0]+8*3] ); + CP16( h->mb.mvd[0][i_mb_xy*8+4], h->mb.cache.mvd[0][x264_scan8[0]+8*0+3] ); + CP16( h->mb.mvd[0][i_mb_xy*8+5], h->mb.cache.mvd[0][x264_scan8[0]+8*1+3] ); + CP16( h->mb.mvd[0][i_mb_xy*8+6], h->mb.cache.mvd[0][x264_scan8[0]+8*2+3] ); if( h->sh.i_type == SLICE_TYPE_B ) - for( y = 0; y < 4; y++ ) - CP64( h->mb.mvd[1][i_mb_4x4+y*s4x4], h->mb.cache.mvd[1][x264_scan8[0]+8*y] ); + { + CP64( h->mb.mvd[1][i_mb_xy*8+0], h->mb.cache.mvd[1][x264_scan8[0]+8*3] ); + CP16( h->mb.mvd[1][i_mb_xy*8+4], h->mb.cache.mvd[1][x264_scan8[0]+8*0+3] ); + CP16( h->mb.mvd[1][i_mb_xy*8+5], h->mb.cache.mvd[1][x264_scan8[0]+8*1+3] ); + CP16( h->mb.mvd[1][i_mb_xy*8+6], h->mb.cache.mvd[1][x264_scan8[0]+8*2+3] ); + } } else { - for( y = 0; y < 4; y++ ) - M64( h->mb.mvd[0][i_mb_4x4+y*s4x4] ) = 0; + M64( h->mb.mvd[0][i_mb_xy*8+0] ) = 0; + M64( h->mb.mvd[0][i_mb_xy*8+4] ) = 0; if( h->sh.i_type == SLICE_TYPE_B ) - for( y = 0; y < 4; y++ ) - M64( h->mb.mvd[1][i_mb_4x4+y*s4x4] ) = 0; + { + M64( h->mb.mvd[1][i_mb_xy*8+0] ) = 0; + M64( h->mb.mvd[1][i_mb_xy*8+4] ) = 0; + } } if( h->sh.i_type == SLICE_TYPE_B )