]> granicus.if.org Git - libx264/commitdiff
Cut size of MVD arrays by a factor of 2 again
authorFiona Glaser <fiona@x264.com>
Thu, 25 Feb 2010 04:51:43 +0000 (20:51 -0800)
committerFiona Glaser <fiona@x264.com>
Thu, 25 Feb 2010 04:51:43 +0000 (20:51 -0800)
Only store the MVDs of the edges of each MB.

Thanks to Michael Niedermayer for the idea.

common/macroblock.c

index decc0319bea8f27f570a5f2f42f6333cf063967c..fdfcb7b625071b96b7232471ca1872f97776dbfc 100644 (file)
@@ -712,8 +712,8 @@ int x264_macroblock_cache_init( x264_t *h )
     if( h->param.b_cabac )
     {
         CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.mvd[0], 2*16 * i_mb_count * sizeof(uint8_t) );
-        CHECKED_MALLOC( h->mb.mvd[1], 2*16 * i_mb_count * sizeof(uint8_t) );
+        CHECKED_MALLOC( h->mb.mvd[0], 2*8 * i_mb_count * sizeof(uint8_t) );
+        CHECKED_MALLOC( h->mb.mvd[1], 2*8 * i_mb_count * sizeof(uint8_t) );
     }
 
     for( i=0; i<2; i++ )
@@ -1211,25 +1211,20 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
             if( h->param.b_cabac )
             {
                 if( i_top_type >= 0 )
-                    CP64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8], h->mb.mvd[i_list][i_top_4x4] );
+                    CP64( h->mb.cache.mvd[i_list][x264_scan8[0]-8], h->mb.mvd[i_list][i_top_xy*8] );
                 else
-                    M64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8] ) = 0;
+                    M64( h->mb.cache.mvd[i_list][x264_scan8[0]-8] ) = 0;
 
                 if( i_left_type >= 0 )
                 {
-                    const int i8 = x264_scan8[0] - 1;
-                    const int iv = i_mb_4x4 - 1;
-                    CP16( h->mb.cache.mvd[i_list][i8+0*8], h->mb.mvd[i_list][iv + 0*s4x4] );
-                    CP16( h->mb.cache.mvd[i_list][i8+1*8], h->mb.mvd[i_list][iv + 1*s4x4] );
-                    CP16( h->mb.cache.mvd[i_list][i8+2*8], h->mb.mvd[i_list][iv + 2*s4x4] );
-                    CP16( h->mb.cache.mvd[i_list][i8+3*8], h->mb.mvd[i_list][iv + 3*s4x4] );
+                    CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+0*8], h->mb.mvd[i_list][i_left_xy*8+4] );
+                    CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+1*8], h->mb.mvd[i_list][i_left_xy*8+5] );
+                    CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+2*8], h->mb.mvd[i_list][i_left_xy*8+6] );
+                    CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+3*8], h->mb.mvd[i_list][i_left_xy*8+3] );
                 }
                 else
-                {
-                    const int i8 = x264_scan8[0] - 1;
                     for( i = 0; i < 4; i++ )
-                        M16( h->mb.cache.mvd[i_list][i8+i*8] ) = 0;
-                }
+                        M16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+i*8] ) = 0;
             }
         }
 
@@ -1406,19 +1401,27 @@ void x264_macroblock_cache_save( x264_t *h )
 
         if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) && !IS_DIRECT( i_mb_type ) )
         {
-            for( y = 0; y < 4; y++ )
-                CP64( h->mb.mvd[0][i_mb_4x4+y*s4x4], h->mb.cache.mvd[0][x264_scan8[0]+8*y] );
+            CP64( h->mb.mvd[0][i_mb_xy*8+0], h->mb.cache.mvd[0][x264_scan8[0]+8*3] );
+            CP16( h->mb.mvd[0][i_mb_xy*8+4], h->mb.cache.mvd[0][x264_scan8[0]+8*0+3] );
+            CP16( h->mb.mvd[0][i_mb_xy*8+5], h->mb.cache.mvd[0][x264_scan8[0]+8*1+3] );
+            CP16( h->mb.mvd[0][i_mb_xy*8+6], h->mb.cache.mvd[0][x264_scan8[0]+8*2+3] );
             if( h->sh.i_type == SLICE_TYPE_B )
-                for( y = 0; y < 4; y++ )
-                    CP64( h->mb.mvd[1][i_mb_4x4+y*s4x4], h->mb.cache.mvd[1][x264_scan8[0]+8*y] );
+            {
+                CP64( h->mb.mvd[1][i_mb_xy*8+0], h->mb.cache.mvd[1][x264_scan8[0]+8*3] );
+                CP16( h->mb.mvd[1][i_mb_xy*8+4], h->mb.cache.mvd[1][x264_scan8[0]+8*0+3] );
+                CP16( h->mb.mvd[1][i_mb_xy*8+5], h->mb.cache.mvd[1][x264_scan8[0]+8*1+3] );
+                CP16( h->mb.mvd[1][i_mb_xy*8+6], h->mb.cache.mvd[1][x264_scan8[0]+8*2+3] );
+            }
         }
         else
         {
-            for( y = 0; y < 4; y++ )
-                M64( h->mb.mvd[0][i_mb_4x4+y*s4x4] ) = 0;
+            M64( h->mb.mvd[0][i_mb_xy*8+0] ) = 0;
+            M64( h->mb.mvd[0][i_mb_xy*8+4] ) = 0;
             if( h->sh.i_type == SLICE_TYPE_B )
-                for( y = 0; y < 4; y++ )
-                    M64( h->mb.mvd[1][i_mb_4x4+y*s4x4] ) = 0;
+            {
+                M64( h->mb.mvd[1][i_mb_xy*8+0] ) = 0;
+                M64( h->mb.mvd[1][i_mb_xy*8+4] ) = 0;
+            }
         }
 
         if( h->sh.i_type == SLICE_TYPE_B )