]> granicus.if.org Git - libx264/commitdiff
MB-tree fixes:
authorLoren Merritt <pengvado@akuvian.org>
Sat, 8 Aug 2009 14:53:27 +0000 (14:53 +0000)
committerLoren Merritt <pengvado@akuvian.org>
Sun, 9 Aug 2009 09:34:26 +0000 (09:34 +0000)
AQ was applied inconsistently, with some AQed costs compared to other non-AQed costs. Strangely enough, fixing this increases SSIM on some sources but decreases it on others. More investigation needed.
Account for weighted bipred.
Reduce memory, increase precision, simplify, and early terminate.

common/frame.c
common/frame.h
encoder/slicetype.c

index b642717d15d378d4651ca7a9153e56e408361988..90dd070500fa2cefe6ef1c70128e6a9cc0635f2f 100644 (file)
@@ -95,7 +95,7 @@ x264_frame_t *x264_frame_new( x264_t *h )
             }
         CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
         memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) );
-        CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
+        CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint16_t) );
         for( j = 0; j <= h->param.i_bframe+1; j++ )
             for( i = 0; i <= h->param.i_bframe+1; i++ )
             {
index a3da4e40b6041b46c17089e373a9c1a5c6341137..f70d38ab01f368458a48f99520f747e2829079d0 100644 (file)
@@ -85,7 +85,7 @@ typedef struct
     float   *f_qp_offset;
     int     b_intra_calculated;
     uint16_t *i_intra_cost;
-    uint32_t *i_propagate_cost;
+    uint16_t *i_propagate_cost;
     uint16_t *i_inv_qscale_factor;
 
     /* threading */
index be32e05647cf0f2e4dea36c5854bbd00835443a5..7b7a4d070cf27e5d26c094b7200f2e7f50f06975 100644 (file)
@@ -403,41 +403,32 @@ static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t
 static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
 {
     x264_frame_t *refs[2] = {frames[p0],frames[p1]};
-    int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
+    int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
     int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
+    int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
 
     for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
     {
-        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+        int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
+        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++, mb_index++ )
         {
-            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
             int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
-            int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
-            int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
-            /* The approximate amount of data that this block contains. */
-            int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
-
-            /* Divide by 64 for per-pixel summing. */
-            propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
+            int intra_cost = frames[b]->i_intra_cost[mb_index];
 
             /* Don't propagate for an intra block. */
             if( inter_cost < intra_cost )
             {
-                int mv[2][2], list;
-                mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
-                mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
-                if( b != p1 )
-                {
-                    mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
-                    mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
-                }
-
+                int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
+                /* The approximate amount of data that this block contains. */
+                int propagate_amount = frames[b]->i_propagate_cost[mb_index] + ((intra_cost * frames[b]->i_inv_qscale_factor[mb_index] + 128)>>8);
+                propagate_amount = ((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost;
+                int list;
                 /* Follow the MVs to the previous frame(s). */
                 for( list = 0; list < 2; list++ )
                     if( (lists_used >> list)&1 )
                     {
-                        int x = mv[list][0];
-                        int y = mv[list][1];
+                        int x = mvs[list][mb_index][0];
+                        int y = mvs[list][mb_index][1];
                         int listamount = propagate_amount;
                         int mbx = (x>>5)+h->mb.i_mb_x;
                         int mby = ((y>>5)+h->mb.i_mb_y);
@@ -445,10 +436,12 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
                         int idx1 = idx0 + 1;
                         int idx2 = idx0 + h->mb.i_mb_stride;
                         int idx3 = idx0 + h->mb.i_mb_stride + 1;
-                        int idx0weight = (32-(y&31))*(32-(x&31));
-                        int idx1weight = (32-(y&31))*(x&31);
-                        int idx2weight = (y&31)*(32-(x&31));
-                        int idx3weight = (y&31)*(x&31);
+                        x &= 31;
+                        y &= 31;
+                        int idx0weight = (32-y)*(32-x);
+                        int idx1weight = (32-y)*x;
+                        int idx2weight = y*(32-x);
+                        int idx3weight = y*x;
 
                         /* Apply bipred weighting. */
                         if( lists_used == 3 )
@@ -460,21 +453,21 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
                          * be counted. */
                         if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
                         {
-                            CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
-                            CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
-                            CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
-                            CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+512)>>10 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+512)>>10 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+512)>>10 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+512)>>10 );
                         }
                         else /* Check offsets individually */
                         {
                             if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
-                                CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+512)>>10 );
                             if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
-                                CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+512)>>10 );
                             if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+512)>>10 );
                             if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+512)>>10 );
                         }
                     }
             }
@@ -497,7 +490,7 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
     if( last_nonb < 0 )
         return;
 
-    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
     while( i-- > idx )
     {
         cur_nonb = i;
@@ -506,12 +499,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
         if( cur_nonb < idx )
             break;
         x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
-        memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+        memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
         x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
         while( frames[i]->i_type == X264_TYPE_B && i > 0 )
         {
             x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
-            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
             x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
             i--;
         }